From 6a195491b6028185c7278718ac21bca309a6c4ea Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Mon, 11 Jan 2021 14:20:36 +0100 Subject: [PATCH 01/86] [AMDGPU] Fix failing assert with scratch ST mode In ST mode, flat scratch instructions have neither an sgpr nor a vgpr for the address. This lead to an assertion when inserting hard clauses. Differential Revision: https://reviews.llvm.org/D94406 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 +- llvm/test/CodeGen/AMDGPU/memory_clause.ll | 278 ++++++++++++++++++++ 3 files changed, 291 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 1cf205f9f5a3f4..36afdefd27b2af 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1298,10 +1298,11 @@ class TargetInstrInfo : public MCInstrInfo { bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const; - /// Get the base operands and byte offset of an instruction that reads/writes - /// memory. + /// Get zero or more base operands and the byte offset of an instruction that + /// reads/writes memory. Note that there may be zero base operands if the + /// instruction accesses a constant address. /// It returns false if MI does not read/write memory. - /// It returns false if no base operands and offset was found. + /// It returns false if base operands and offset could not be determined. /// It is not guaranteed to always recognize base operands and offsets in all /// cases. virtual bool getMemOperandsWithOffsetWidth( diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index eebee8e16bc356..6bf9db3f7b2c2a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -387,7 +387,7 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth( } if (isFLAT(LdSt)) { - // Instructions have either vaddr or saddr or both. + // Instructions have either vaddr or saddr or both or none. BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (BaseOp) BaseOps.push_back(BaseOp); @@ -443,11 +443,15 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef BaseOps1, unsigned NumBytes) const { // If the mem ops (to be clustered) do not have the same base ptr, then they // should not be clustered - assert(!BaseOps1.empty() && !BaseOps2.empty()); - const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); - const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); - if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) + if (!BaseOps1.empty() && !BaseOps2.empty()) { + const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); + const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); + if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) + return false; + } else if (!BaseOps1.empty() || !BaseOps2.empty()) { + // If only one base op is empty, they do not have the same base ptr return false; + } // In order to avoid regester pressure, on an average, the number of DWORDS // loaded together by all clustered mem ops should not exceed 8. This is an diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll index 2c5931ef57b635..154d8e3320ea83 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SCRATCH %s define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) { ; GCN-LABEL: vector_clause: @@ -21,6 +22,31 @@ define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocaptu ; GCN-NEXT: s_waitcnt vmcnt(3) ; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[4:5] offset:48 ; GCN-NEXT: s_endpgm +; +; GCN-SCRATCH-LABEL: vector_clause: +; GCN-SCRATCH: ; %bb.0: ; %bb +; GCN-SCRATCH-NEXT: s_add_u32 s2, s2, s5 +; GCN-SCRATCH-NEXT: s_addc_u32 s3, s3, 0 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GCN-SCRATCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v16, 4, v0 +; GCN-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GCN-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) +; GCN-SCRATCH-NEXT: s_clause 0x3 +; GCN-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v16, s[2:3] +; GCN-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v16, s[2:3] offset:16 +; GCN-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[2:3] offset:32 +; GCN-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[2:3] offset:48 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(3) +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(2) +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1) +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GCN-SCRATCH-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp to i64 @@ -79,6 +105,45 @@ define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocaptu ; GCN-NEXT: v_mov_b32_e32 v3, s15 ; GCN-NEXT: global_store_dwordx4 v12, v[0:3], s[18:19] offset:48 ; GCN-NEXT: s_endpgm +; +; GCN-SCRATCH-LABEL: scalar_clause: +; GCN-SCRATCH: ; %bb.0: ; %bb +; GCN-SCRATCH-NEXT: s_add_u32 s2, s2, s5 +; GCN-SCRATCH-NEXT: s_addc_u32 s3, s3, 0 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GCN-SCRATCH-NEXT: s_clause 0x1 +; GCN-SCRATCH-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24 +; GCN-SCRATCH-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x2c +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v16, 0 +; GCN-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) +; GCN-SCRATCH-NEXT: s_clause 0x3 +; GCN-SCRATCH-NEXT: s_load_dwordx4 s[0:3], s[12:13], 0x0 +; GCN-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[12:13], 0x10 +; GCN-SCRATCH-NEXT: s_load_dwordx4 s[8:11], s[12:13], 0x20 +; GCN-SCRATCH-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x30 +; GCN-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v0, s0 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v4, s4 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v1, s1 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v2, s2 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v3, s3 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v8, s8 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v5, s5 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v6, s6 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v7, s7 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v12, s12 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v9, s9 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v10, s10 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v11, s11 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v13, s13 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v14, s14 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v15, s15 +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16 +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32 +; GCN-SCRATCH-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48 +; GCN-SCRATCH-NEXT: s_endpgm bb: %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %arg, align 16 %tmp2 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 1 @@ -143,6 +208,30 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar ; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:48 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GCN-SCRATCH-LABEL: mubuf_clause: +; GCN-SCRATCH: ; %bb.0: ; %bb +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: v_and_b32_e32 v2, 0x3ff, v2 +; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v18, 4, v2 +; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18 +; GCN-SCRATCH-NEXT: s_clause 0x3 +; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[2:5], v0, off +; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[6:9], v0, off offset:16 +; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[10:13], v0, off offset:32 +; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[14:17], v0, off offset:48 +; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v1, v18 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(3) +; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[2:5], off +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(2) +; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1) +; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:48 +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31] bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* %arg, i32 %tmp @@ -184,6 +273,28 @@ define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias noca ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: global_store_dwordx4 v8, v[4:7], s[4:5] offset:16 ; GCN-NEXT: s_endpgm +; +; GCN-SCRATCH-LABEL: vector_clause_indirect: +; GCN-SCRATCH: ; %bb.0: ; %bb +; GCN-SCRATCH-NEXT: s_add_u32 s2, s2, s5 +; GCN-SCRATCH-NEXT: s_addc_u32 s3, s3, 0 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GCN-SCRATCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GCN-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v8, 0 +; GCN-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) +; GCN-SCRATCH-NEXT: global_load_dwordx2 v[4:5], v0, s[2:3] +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: s_clause 0x1 +; GCN-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[4:5], off +; GCN-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v[4:5], off offset:16 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1) +; GCN-SCRATCH-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GCN-SCRATCH-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp3 = zext i32 %tmp to i64 @@ -213,6 +324,21 @@ define void @load_global_d16_hi(i16 addrspace(1)* %in, i16 %reg, <2 x i16> addrs ; GCN-NEXT: global_store_dword v[3:4], v2, off offset:128 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GCN-SCRATCH-LABEL: load_global_d16_hi: +; GCN-SCRATCH: ; %bb.0: ; %entry +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v6, v2 +; GCN-SCRATCH-NEXT: s_clause 0x1 +; GCN-SCRATCH-NEXT: global_load_short_d16_hi v6, v[0:1], off +; GCN-SCRATCH-NEXT: global_load_short_d16_hi v2, v[0:1], off offset:64 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1) +; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v6, off +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v2, off offset:128 +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 32 %load1 = load i16, i16 addrspace(1)* %in @@ -241,6 +367,21 @@ define void @load_global_d16_lo(i16 addrspace(1)* %in, i32 %reg, <2 x i16> addrs ; GCN-NEXT: global_store_dword v[3:4], v2, off offset:128 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GCN-SCRATCH-LABEL: load_global_d16_lo: +; GCN-SCRATCH: ; %bb.0: ; %entry +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v6, v2 +; GCN-SCRATCH-NEXT: s_clause 0x1 +; GCN-SCRATCH-NEXT: global_load_short_d16 v6, v[0:1], off +; GCN-SCRATCH-NEXT: global_load_short_d16 v2, v[0:1], off offset:64 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1) +; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v6, off +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v2, off offset:128 +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 32 %reg.bc1 = bitcast i32 %reg to <2 x i16> @@ -255,4 +396,141 @@ entry: ret void } +define amdgpu_kernel void @flat_scratch_load(float %a, float %b, <8 x i32> %desc) { +; GCN-LABEL: flat_scratch_load: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b32 s16, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s17, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s18, -1 +; GCN-NEXT: s_mov_b32 s19, 0xe00000 +; GCN-NEXT: s_add_u32 s16, s16, s3 +; GCN-NEXT: s_addc_u32 s17, s17, 0 +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0x40b00000 +; GCN-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x24 +; GCN-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x44 +; GCN-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:4 +; GCN-NEXT: s_brev_b32 s0, 1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s14 +; GCN-NEXT: s_mov_b32 s3, 0 +; GCN-NEXT: s_mov_b32 s1, s0 +; GCN-NEXT: s_mov_b32 s2, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s15 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample v0, v[0:1], s[4:11], s[0:3] dmask:0x1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_f32_e32 v0, v2, v0 +; GCN-NEXT: exp mrt0 v0, off, off, off done vm +; GCN-NEXT: s_endpgm +; +; GCN-SCRATCH-LABEL: flat_scratch_load: +; GCN-SCRATCH: ; %bb.0: ; %.entry +; GCN-SCRATCH-NEXT: s_add_u32 s2, s2, s5 +; GCN-SCRATCH-NEXT: s_addc_u32 s3, s3, 0 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GCN-SCRATCH-NEXT: s_mov_b32 s9, exec_lo +; GCN-SCRATCH-NEXT: s_wqm_b32 exec_lo, exec_lo +; GCN-SCRATCH-NEXT: s_clause 0x1 +; GCN-SCRATCH-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24 +; GCN-SCRATCH-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x44 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40b00000 +; GCN-SCRATCH-NEXT: s_brev_b32 s8, 1 +; GCN-SCRATCH-NEXT: scratch_store_dword off, v0, off offset:4 +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: ;;#ASMSTART +; GCN-SCRATCH-NEXT: ;;#ASMEND +; GCN-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v0, s10 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v1, s11 +; GCN-SCRATCH-NEXT: s_and_b32 exec_lo, exec_lo, s9 +; GCN-SCRATCH-NEXT: s_mov_b32 s11, 0 +; GCN-SCRATCH-NEXT: s_mov_b32 s9, s8 +; GCN-SCRATCH-NEXT: s_mov_b32 s10, s8 +; GCN-SCRATCH-NEXT: scratch_load_dword v2, off, off offset:4 +; GCN-SCRATCH-NEXT: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: v_add_f32_e32 v0, v2, v0 +; GCN-SCRATCH-NEXT: exp mrt0 v0, off, off, off done vm +; GCN-SCRATCH-NEXT: s_endpgm +.entry: + %alloca = alloca float, align 4, addrspace(5) + store volatile float 5.5, float addrspace(5)* %alloca + call void asm sideeffect "", ""() + ; There was a bug with flat scratch instructions that do not not use any address registers (ST mode). + ; To trigger, the scratch_load has to be immediately before the image_sample in MIR. + %load = load float, float addrspace(5)* %alloca + %val = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 9, float %a, float %b, <8 x i32> %desc, <4 x i32> , i1 false, i32 0, i32 0) + %val0 = extractelement <2 x float> %val, i32 0 + %valadd = fadd float %load, %val0 + call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 1, float %valadd, float undef, float undef, float undef, i1 immarg true, i1 immarg true) + ret void +} + +define amdgpu_kernel void @flat_scratch_load_clause(float %a, float %b, <8 x i32> %desc) { +; GCN-LABEL: flat_scratch_load_clause: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s7, 0xe00000 +; GCN-NEXT: s_add_u32 s4, s4, s3 +; GCN-NEXT: s_addc_u32 s5, s5, 0 +; GCN-NEXT: v_mov_b32_e32 v0, 0x40b00000 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, 0x40d00000 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0 offset:4 +; GCN-NEXT: buffer_load_dword v1, off, s[4:7], 0 offset:8 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_f32_e32 v0, v0, v1 +; GCN-NEXT: exp mrt0 v0, off, off, off done vm +; GCN-NEXT: s_endpgm +; +; GCN-SCRATCH-LABEL: flat_scratch_load_clause: +; GCN-SCRATCH: ; %bb.0: ; %.entry +; GCN-SCRATCH-NEXT: s_add_u32 s2, s2, s5 +; GCN-SCRATCH-NEXT: s_addc_u32 s3, s3, 0 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GCN-SCRATCH-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40b00000 +; GCN-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40d00000 +; GCN-SCRATCH-NEXT: scratch_store_dword off, v0, off offset:4 +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: scratch_store_dword off, v1, off offset:8 +; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-SCRATCH-NEXT: ;;#ASMSTART +; GCN-SCRATCH-NEXT: ;;#ASMEND +; GCN-SCRATCH-NEXT: s_clause 0x1 +; GCN-SCRATCH-NEXT: scratch_load_dword v0, off, off offset:4 +; GCN-SCRATCH-NEXT: scratch_load_dword v1, off, off offset:8 +; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GCN-SCRATCH-NEXT: v_add_f32_e32 v0, v0, v1 +; GCN-SCRATCH-NEXT: exp mrt0 v0, off, off, off done vm +; GCN-SCRATCH-NEXT: s_endpgm +.entry: + %alloca = alloca float, align 4, addrspace(5) + %alloca2 = alloca float, align 4, addrspace(5) + store volatile float 5.5, float addrspace(5)* %alloca + store volatile float 6.5, float addrspace(5)* %alloca2 + call void asm sideeffect "", ""() + %load0 = load float, float addrspace(5)* %alloca + %load1 = load float, float addrspace(5)* %alloca2 + %valadd = fadd float %load0, %load1 + call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 1, float %valadd, float undef, float undef, float undef, i1 immarg true, i1 immarg true) + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) +declare <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) From 7ab803095ae58445996dc4694acb216e3a32ee64 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 12 Jan 2021 09:09:06 +0100 Subject: [PATCH 02/86] [clang][cli] Remove -f[no-]trapping-math from -cc1 command line This patch removes the -f[no-]trapping-math flags from the -cc1 command line. These flags are ignored in the command line parser and their semantics is fully handled by -ffp-exception-mode. This patch does not remove -f[no-]trapping-math from the driver command line. The driver flags are being used and do affect compilation. Reviewed By: dexonsmith, SjoerdMeijer Differential Revision: https://reviews.llvm.org/D93395 --- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/Clang.cpp | 4 +--- clang/lib/Frontend/CompilerInvocation.cpp | 8 -------- clang/test/CodeGen/fpconstrained.c | 4 ++-- clang/test/CodeGen/fpconstrained.cpp | 4 ++-- clang/test/CodeGen/noexceptionsfpmath.c | 2 +- clang/test/CodeGenCUDA/propagate-metadata.cu | 12 ++++-------- clang/test/Driver/fast-math.c | 4 ---- clang/test/Driver/fp-model.c | 8 -------- clang/test/Parser/fp-floatcontrol-syntax.cpp | 4 ++-- 10 files changed, 14 insertions(+), 40 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b18c89931ceed3..35643701f97e4e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1600,8 +1600,8 @@ def frounding_math : Flag<["-"], "frounding-math">, Group, Flags<[CC1Op MarshallingInfoFlag<"LangOpts->FPRoundingMode", "llvm::RoundingMode::NearestTiesToEven">, Normalizer<"makeFlagToValueNormalizer(llvm::RoundingMode::Dynamic)">; def fno_rounding_math : Flag<["-"], "fno-rounding-math">, Group, Flags<[CC1Option]>; -def ftrapping_math : Flag<["-"], "ftrapping-math">, Group, Flags<[CC1Option]>; -def fno_trapping_math : Flag<["-"], "fno-trapping-math">, Group, Flags<[CC1Option]>; +def ftrapping_math : Flag<["-"], "ftrapping-math">, Group; +def fno_trapping_math : Flag<["-"], "fno-trapping-math">, Group; def ffp_contract : Joined<["-"], "ffp-contract=">, Group, Flags<[CC1Option]>, HelpText<"Form fused FP ops (e.g. FMAs):" " fast (fuses across statements disregarding pragmas)" diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index be4fe7f8eddd0d..4a20936ddda12c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2833,9 +2833,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, if (TrappingMath) { // FP Exception Behavior is also set to strict assert(FPExceptionBehavior.equals("strict")); - CmdArgs.push_back("-ftrapping-math"); - } else if (TrappingMathPresent) - CmdArgs.push_back("-fno-trapping-math"); + } // The default is IEEE. if (DenormalFPMath != llvm::DenormalMode::getIEEE()) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 07906f4a36efd2..cc3b038a774627 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2684,14 +2684,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } - if (Args.hasArg(OPT_ftrapping_math)) { - Opts.setFPExceptionMode(LangOptions::FPE_Strict); - } - - if (Args.hasArg(OPT_fno_trapping_math)) { - Opts.setFPExceptionMode(LangOptions::FPE_Ignore); - } - LangOptions::FPExceptionModeKind FPEB = LangOptions::FPE_Ignore; if (Arg *A = Args.getLastArg(OPT_ffp_exception_behavior_EQ)) { StringRef Val = A->getValue(); diff --git a/clang/test/CodeGen/fpconstrained.c b/clang/test/CodeGen/fpconstrained.c index 0307ebbd357f52..643c0120eac5ad 100644 --- a/clang/test/CodeGen/fpconstrained.c +++ b/clang/test/CodeGen/fpconstrained.c @@ -1,11 +1,11 @@ -// RUN: %clang_cc1 -ftrapping-math -frounding-math -ffp-exception-behavior=strict -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=FPMODELSTRICT +// RUN: %clang_cc1 -frounding-math -ffp-exception-behavior=strict -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=FPMODELSTRICT // RUN: %clang_cc1 -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s -check-prefix=PRECISE // RUN: %clang_cc1 -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST // RUN: %clang_cc1 -ffast-math -emit-llvm -o - %s | FileCheck %s -check-prefix=FASTNOCONTRACT // RUN: %clang_cc1 -ffast-math -ffp-contract=fast -ffp-exception-behavior=ignore -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST // RUN: %clang_cc1 -ffast-math -ffp-contract=fast -ffp-exception-behavior=strict -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=EXCEPT // RUN: %clang_cc1 -ffast-math -ffp-contract=fast -ffp-exception-behavior=maytrap -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=MAYTRAP - + float f0, f1, f2; void foo() { diff --git a/clang/test/CodeGen/fpconstrained.cpp b/clang/test/CodeGen/fpconstrained.cpp index 305c3684486d91..884c359acab345 100644 --- a/clang/test/CodeGen/fpconstrained.cpp +++ b/clang/test/CodeGen/fpconstrained.cpp @@ -1,11 +1,11 @@ -// RUN: %clang_cc1 -x c++ -ftrapping-math -fexceptions -fcxx-exceptions -frounding-math -ffp-exception-behavior=strict -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=FPMODELSTRICT +// RUN: %clang_cc1 -x c++ -fexceptions -fcxx-exceptions -frounding-math -ffp-exception-behavior=strict -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=FPMODELSTRICT // RUN: %clang_cc1 -x c++ -ffp-contract=fast -fexceptions -fcxx-exceptions -emit-llvm -o - %s | FileCheck %s -check-prefix=PRECISE // RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST // RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -emit-llvm -o - %s | FileCheck %s -check-prefix=FASTNOCONTRACT // RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -ffp-exception-behavior=ignore -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST // RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -ffp-exception-behavior=strict -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=EXCEPT // RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -ffp-exception-behavior=maytrap -fexperimental-strict-floating-point -emit-llvm -o - %s | FileCheck %s -check-prefix=MAYTRAP - + float f0, f1, f2; template diff --git a/clang/test/CodeGen/noexceptionsfpmath.c b/clang/test/CodeGen/noexceptionsfpmath.c index a22e285bb72a61..f468e98eb7a6da 100644 --- a/clang/test/CodeGen/noexceptionsfpmath.c +++ b/clang/test/CodeGen/noexceptionsfpmath.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -S -fno-trapping-math %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -S %s -emit-llvm -o - | FileCheck %s // CHECK-LABEL: main // CHECK: attributes #0 = {{.*}}"no-trapping-math"="true"{{.*}} diff --git a/clang/test/CodeGenCUDA/propagate-metadata.cu b/clang/test/CodeGenCUDA/propagate-metadata.cu index e90fe391150a58..c90b66f536c8cd 100644 --- a/clang/test/CodeGenCUDA/propagate-metadata.cu +++ b/clang/test/CodeGenCUDA/propagate-metadata.cu @@ -4,28 +4,24 @@ // // In particular, we check that ftz and unsafe-math are propagated into the // bitcode library as appropriate. -// -// In addition, we set -ftrapping-math on the bitcode library, but then set -// -fno-trapping-math on the main compilations, and ensure that the latter flag -// overrides the flag on the bitcode library. // Build the bitcode library. This is not built in CUDA mode, otherwise it // might have incompatible attributes. This mirrors how libdevice is built. -// RUN: %clang_cc1 -x c++ -fconvergent-functions -emit-llvm-bc -ftrapping-math -DLIB \ +// RUN: %clang_cc1 -x c++ -fconvergent-functions -emit-llvm-bc -DLIB \ // RUN: %s -o %t.bc -triple nvptx-unknown-unknown // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \ -// RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \ +// RUN: -fcuda-is-device -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \ -// RUN: -fno-trapping-math -fdenormal-fp-math-f32=preserve-sign -o - \ +// RUN: -fdenormal-fp-math-f32=preserve-sign -o - \ // RUN: -fcuda-is-device -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ \ // RUN: --check-prefix=NOFAST // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \ -// RUN: -fno-trapping-math -fdenormal-fp-math-f32=preserve-sign -o - \ +// RUN: -fdenormal-fp-math-f32=preserve-sign -o - \ // RUN: -fcuda-is-device -menable-unsafe-fp-math -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST diff --git a/clang/test/Driver/fast-math.c b/clang/test/Driver/fast-math.c index da47de260dc902..d6c82add799cd6 100644 --- a/clang/test/Driver/fast-math.c +++ b/clang/test/Driver/fast-math.c @@ -292,10 +292,6 @@ // CHECK-NO-REASSOC-NO-UNSAFE-MATH: "-o" -// RUN: %clang -### -ftrapping-math -fno-trapping-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-NO-TRAPPING-MATH %s -// CHECK-NO-TRAPPING-MATH: "-fno-trapping-math" - // This isn't fast-math, but the option is handled in the same place as other FP params. // Last option wins, and strict behavior is assumed by default. diff --git a/clang/test/Driver/fp-model.c b/clang/test/Driver/fp-model.c index 8e61b4411cae4b..5fa9d110dd836b 100644 --- a/clang/test/Driver/fp-model.c +++ b/clang/test/Driver/fp-model.c @@ -80,7 +80,6 @@ // RUN: %clang -### -ftrapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-TRAP %s // CHECK-TRAP: "-cc1" -// CHECK-TRAP: "-ftrapping-math" // CHECK-TRAP: "-ffp-exception-behavior=strict" // RUN: %clang -### -nostdinc -ffp-model=fast -c %s 2>&1 \ @@ -106,16 +105,9 @@ // RUN: %clang -### -nostdinc -ffp-model=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-STRICT %s // CHECK-FPM-STRICT: "-cc1" -// CHECK-FPM-STRICT: "-ftrapping-math" // CHECK-FPM-STRICT: "-frounding-math" // CHECK-FPM-STRICT: "-ffp-exception-behavior=strict" -// RUN: %clang -### -nostdinc -ftrapping-math -ffp-exception-behavior=ignore -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-TRAP-IGNORE %s -// CHECK-TRAP-IGNORE: "-cc1" -// CHECK-TRAP-IGNORE: "-fno-rounding-math" -// CHECK-TRAP-IGNORE: "-ffp-exception-behavior=ignore" - // RUN: %clang -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-STRICT %s diff --git a/clang/test/Parser/fp-floatcontrol-syntax.cpp b/clang/test/Parser/fp-floatcontrol-syntax.cpp index 5e7e8b1b03bc42..570821fe7468a0 100644 --- a/clang/test/Parser/fp-floatcontrol-syntax.cpp +++ b/clang/test/Parser/fp-floatcontrol-syntax.cpp @@ -19,9 +19,9 @@ void check_stack() { } #endif -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fdenormal-fp-math=preserve-sign,preserve-sign -ftrapping-math -fsyntax-only %s -DDEFAULT -verify +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fdenormal-fp-math=preserve-sign,preserve-sign -fsyntax-only %s -DDEFAULT -verify // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only %s -ffp-contract=fast -DPRECISE -verify -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only %s -ftrapping-math -ffp-contract=off -frounding-math -ffp-exception-behavior=strict -DSTRICT -verify +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only %s -ffp-contract=off -frounding-math -ffp-exception-behavior=strict -DSTRICT -verify // RUN: %clang_cc1 -triple x86_64-linux-gnu -menable-no-infs -menable-no-nans -menable-unsafe-fp-math -fno-signed-zeros -mreassociate -freciprocal-math -ffp-contract=fast -ffast-math -ffinite-math-only -fsyntax-only %s -DFAST -verify double a = 0.0; double b = 1.0; From bd30a796fc4b51750248ccba29cd6fb1f61859f5 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Mon, 11 Jan 2021 13:58:05 +0100 Subject: [PATCH 03/86] [mlir] use built-in vector types instead of LLVM dialect types when possible Continue the convergence between LLVM dialect and built-in types by using the built-in vector type whenever possible, that is for fixed vectors of built-in integers and built-in floats. LLVM dialect vector type is still in use for pointers, less frequent floating point types that do not have a built-in equivalent, and scalable vectors. However, the top-level `LLVMVectorType` class has been removed in favor of free functions capable of inspecting both built-in and LLVM dialect vector types: `LLVM::getVectorElementType`, `LLVM::getNumVectorElements` and `LLVM::getFixedVectorType`. Additional work is necessary to design an implemented the extensions to built-in types so as to remove the `LLVMFixedVectorType` entirely. Note that the default output format for the built-in vectors does not have whitespace around the `x` separator, e.g., `vector<4xf32>` as opposed to the LLVM dialect vector type format that does, e.g., `!llvm.vec<4 x fp128>`. This required changing the FileCheck patterns in several tests. Reviewed By: mehdi_amini, silvas Differential Revision: https://reviews.llvm.org/D94405 --- mlir/docs/ConversionToLLVMDialect.md | 4 +- mlir/docs/Dialects/LLVM.md | 4 +- mlir/docs/SPIRVToLLVMDialectConversion.md | 18 +- .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 8 +- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 10 +- mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h | 49 +- .../LLVMIR/CPU/test-vector-reductions-fp.mlir | 32 +- .../CPU/test-vector-reductions-int.mlir | 30 +- .../Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp | 4 +- .../StandardToLLVM/StandardToLLVM.cpp | 19 +- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 7 +- .../VectorToROCDL/VectorToROCDL.cpp | 16 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 54 +- mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp | 14 +- mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 143 +++- mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 2 +- mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp | 4 +- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 14 +- mlir/lib/Target/LLVMIR/TypeTranslation.cpp | 15 +- .../ArmNeonToLLVM/convert-to-llvm.mlir | 6 +- .../SPIRVToLLVM/arithmetic-ops-to-llvm.mlir | 26 +- .../SPIRVToLLVM/bitwise-ops-to-llvm.mlir | 94 +-- .../SPIRVToLLVM/cast-ops-to-llvm.mlir | 28 +- .../SPIRVToLLVM/comparison-ops-to-llvm.mlir | 44 +- .../SPIRVToLLVM/constant-op-to-llvm.mlir | 12 +- .../SPIRVToLLVM/func-ops-to-llvm.mlir | 8 +- .../SPIRVToLLVM/glsl-ops-to-llvm.mlir | 24 +- .../SPIRVToLLVM/logical-ops-to-llvm.mlir | 12 +- .../SPIRVToLLVM/memory-ops-to-llvm.mlir | 8 +- .../SPIRVToLLVM/misc-ops-to-llvm.mlir | 10 +- .../SPIRVToLLVM/shift-ops-to-llvm.mlir | 36 +- .../SPIRVToLLVM/spirv-types-to-llvm.mlir | 6 +- .../StandardToLLVM/convert-to-llvmir.mlir | 140 ++-- .../StandardToLLVM/standard-to-llvm.mlir | 20 +- .../VectorToLLVM/vector-mask-to-llvm.mlir | 40 +- .../vector-reduction-to-llvm.mlir | 16 +- .../VectorToLLVM/vector-to-llvm.mlir | 726 +++++++++--------- .../VectorToROCDL/vector-to-rocdl.mlir | 8 +- mlir/test/Dialect/LLVMIR/dialect-cast.mlir | 17 +- mlir/test/Dialect/LLVMIR/invalid.mlir | 62 +- mlir/test/Dialect/LLVMIR/nvvm.mlir | 8 +- mlir/test/Dialect/LLVMIR/rocdl.mlir | 164 ++-- mlir/test/Dialect/LLVMIR/roundtrip.mlir | 36 +- mlir/test/Dialect/LLVMIR/types-invalid.mlir | 12 +- mlir/test/Dialect/LLVMIR/types.mlir | 8 +- mlir/test/Target/arm-neon.mlir | 22 +- mlir/test/Target/arm-sve.mlir | 56 +- mlir/test/Target/avx512.mlir | 20 +- mlir/test/Target/import.ll | 6 +- mlir/test/Target/llvmir-intrinsics.mlir | 172 ++--- mlir/test/Target/llvmir-types.mlir | 10 +- mlir/test/Target/llvmir.mlir | 84 +- mlir/test/Target/nvvmir.mlir | 6 +- mlir/test/Target/rocdl.mlir | 110 +-- 54 files changed, 1284 insertions(+), 1220 deletions(-) diff --git a/mlir/docs/ConversionToLLVMDialect.md b/mlir/docs/ConversionToLLVMDialect.md index d0ea746853b17a..d36b4498272ce6 100644 --- a/mlir/docs/ConversionToLLVMDialect.md +++ b/mlir/docs/ConversionToLLVMDialect.md @@ -48,8 +48,8 @@ size with element type converted using these conversion rules. In the n-dimensional case, MLIR vectors are converted to (n-1)-dimensional array types of one-dimensional vectors. -For example, `vector<4 x f32>` converts to `!llvm.vec<4 x f32>` and `vector<4 x -8 x 16 x f32>` converts to `!llvm.array<4 x array<8 x vec<16 x f32>>>`. +For example, `vector<4xf32>` converts to `vector<4xf32>` and `vector<4 x 8 x 16 +x f32>` converts to `!llvm.array<4 x array<8 x vec<16 x f32>>>`. ### Ranked Memref Types diff --git a/mlir/docs/Dialects/LLVM.md b/mlir/docs/Dialects/LLVM.md index 3d91588b2d21d6..d232ffab148c4a 100644 --- a/mlir/docs/Dialects/LLVM.md +++ b/mlir/docs/Dialects/LLVM.md @@ -127,7 +127,7 @@ Examples: %3 = llvm.mlir.constant(42 : i32) : i32 // Splat dense vector constant. -%3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : !llvm.vec<4 x f32> +%3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : vector<4xf32> ``` Note that constants use built-in types within the initializer definition: MLIR @@ -274,7 +274,7 @@ Vectors cannot be nested and only 1D vectors are supported. Scalable vectors are still considered 1D. Their syntax is as follows: ``` - llvm-vec-type ::= `!llvm.vec<` (`?` `x`)? integer-literal `x` llvm-type `>` + llvm-vec-type ::= `vector<` (`?` `x`)? integer-literal `x` llvm-type `>` ``` Internally, fixed vector types are represented as `LLVMFixedVectorType` and diff --git a/mlir/docs/SPIRVToLLVMDialectConversion.md b/mlir/docs/SPIRVToLLVMDialectConversion.md index 30188f692b4332..de291ac22c4363 100644 --- a/mlir/docs/SPIRVToLLVMDialectConversion.md +++ b/mlir/docs/SPIRVToLLVMDialectConversion.md @@ -34,9 +34,9 @@ SPIR-V Dialect | LLVM Dialect ### Vector types -SPIR-V Dialect | LLVM Dialect -:----------------------------------: | :----------------------------------: -`vector< x >` | `!llvm.vec< x >` +SPIR-V Dialect | LLVM Dialect +:-------------------------------: | :-------------------------------: +`vector< x >` | `vector< x >` ### Pointer types @@ -188,11 +188,11 @@ to note: ```mlir // Broadcasting offset - %offset0 = llvm.mlir.undef : !llvm.vec<2 x i8> + %offset0 = llvm.mlir.undef : vector<2xi8> %zero = llvm.mlir.constant(0 : i32) : i32 - %offset1 = llvm.insertelement %offset, %offset0[%zero : i32] : !llvm.vec<2 x i8> + %offset1 = llvm.insertelement %offset, %offset0[%zero : i32] : vector<2xi8> %one = llvm.mlir.constant(1 : i32) : i32 - %vec_offset = llvm.insertelement %offset, %offset1[%one : i32] : !llvm.vec<2 x i8> + %vec_offset = llvm.insertelement %offset, %offset1[%one : i32] : vector<2xi8> // Broadcasting count // ... @@ -205,7 +205,7 @@ to note: ```mlir // Zero extending offset after broadcasting - %res_offset = llvm.zext %vec_offset: !llvm.vec<2 x i8> to !llvm.vec<2 x i32> + %res_offset = llvm.zext %vec_offset: vector<2xi8> to vector<2xi32> ``` Also, note that if the bitwidth of `offset` or `count` is greater than the @@ -534,7 +534,7 @@ Also, at the moment initialization is only possible via `spv.constant`. ```mlir // Conversion of VariableOp without initialization %size = llvm.mlir.constant(1 : i32) : i32 -%res = spv.Variable : !spv.ptr, Function> => %res = llvm.alloca %size x !llvm.vec<3 x f32> : (i32) -> !llvm.ptr> +%res = spv.Variable : !spv.ptr, Function> => %res = llvm.alloca %size x vector<3xf32> : (i32) -> !llvm.ptr> // Conversion of VariableOp with initialization %c = llvm.mlir.constant(0 : i64) : i64 @@ -610,7 +610,7 @@ cover all possible corner cases. // %0 = llvm.mlir.constant(0 : i8) : i8 %0 = spv.constant 0 : i8 -// %1 = llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm.vec<3 x i32> +// %1 = llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : vector<3xi32> %1 = spv.constant dense<[2, 3, 4]> : vector<3xui32> ``` diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 0d3f5322531d1b..0ef223c4b0232c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -126,8 +126,8 @@ def LLVM_AnyNonAggregate : Type, "LLVM non-aggregate type">; // Type constraint accepting any LLVM vector type. -def LLVM_AnyVector : Type()">, - "LLVM vector type">; +def LLVM_AnyVector : Type, + "LLVM dialect-compatible vector type">; // Type constraint accepting an LLVM vector type with an additional constraint // on the vector element type. @@ -135,9 +135,9 @@ class LLVM_VectorOf : Type< And<[LLVM_AnyVector.predicate, SubstLeaves< "$_self", - "$_self.cast<::mlir::LLVM::LLVMVectorType>().getElementType()", + "::mlir::LLVM::getVectorElementType($_self)", element.predicate>]>, - "LLVM vector of " # element.summary>; + "LLVM dialect-compatible vector of " # element.summary>; // Type constraint accepting a constrained type, or a vector of such types. class LLVM_ScalarOrVectorOf : diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index ce91dffe861c7b..cb2eede3040e49 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -555,10 +555,10 @@ def LLVM_ShuffleVectorOp : LLVM_Op<"shufflevector", [NoSideEffect]> { OpBuilderDAG<(ins "Value":$v1, "Value":$v2, "ArrayAttr":$mask, CArg<"ArrayRef", "{}">:$attrs)>]; let verifier = [{ - auto wrappedVectorType1 = v1().getType().cast(); - auto wrappedVectorType2 = v2().getType().cast(); - if (wrappedVectorType1.getElementType() != - wrappedVectorType2.getElementType()) + auto type1 = v1().getType(); + auto type2 = v2().getType(); + if (::mlir::LLVM::getVectorElementType(type1) != + ::mlir::LLVM::getVectorElementType(type2)) return emitOpError("expected matching LLVM IR Dialect element types"); return success(); }]; @@ -1111,7 +1111,7 @@ def LLVM_ConstantOp %2 = llvm.mlir.constant(42.0 : f32) : f32 // Splat dense vector constant. - %3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : !llvm.vec<4 x f32> + %3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : vector<4xf32> ``` }]; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index 3cd1733b8d52b6..f21dd1de995efa 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -317,12 +317,11 @@ class LLVMVectorType : public Type { /// LLVM dialect fixed vector type, represents a sequence of elements of known /// length that can be processed as one. class LLVMFixedVectorType - : public Type::TypeBase { public: /// Inherit base constructor. using Base::Base; - using LLVMVectorType::verifyConstructionInvariants; /// Gets or creates a fixed vector type containing `numElements` of /// `elementType` in the same context as `elementType`. @@ -330,8 +329,21 @@ class LLVMFixedVectorType static LLVMFixedVectorType getChecked(Location loc, Type elementType, unsigned numElements); + /// Checks if the given type can be used in a vector type. This type supports + /// only a subset of LLVM dialect types that don't have a built-in + /// counter-part, e.g., pointers. + static bool isValidElementType(Type type); + + /// Returns the element type of the vector. + Type getElementType(); + /// Returns the number of elements in the fixed vector. unsigned getNumElements(); + + /// Verifies that the type about to be constructed is well-formed. + static LogicalResult verifyConstructionInvariants(Location loc, + Type elementType, + unsigned numElements); }; //===----------------------------------------------------------------------===// @@ -342,12 +354,11 @@ class LLVMFixedVectorType /// unknown length that is known to be divisible by some constant. These /// elements can be processed as one in SIMD context. class LLVMScalableVectorType - : public Type::TypeBase { public: /// Inherit base constructor. using Base::Base; - using LLVMVectorType::verifyConstructionInvariants; /// Gets or creates a scalable vector type containing a non-zero multiple of /// `minNumElements` of `elementType` in the same context as `elementType`. @@ -355,10 +366,21 @@ class LLVMScalableVectorType static LLVMScalableVectorType getChecked(Location loc, Type elementType, unsigned minNumElements); + /// Checks if the given type can be used in a vector type. + static bool isValidElementType(Type type); + + /// Returns the element type of the vector. + Type getElementType(); + /// Returns the scaling factor of the number of elements in the vector. The /// vector contains at least the resulting number of elements, or any non-zero /// multiple of this number. unsigned getMinNumElements(); + + /// Verifies that the type about to be constructed is well-formed. + static LogicalResult verifyConstructionInvariants(Location loc, + Type elementType, + unsigned minNumElements); }; //===----------------------------------------------------------------------===// @@ -384,9 +406,26 @@ bool isCompatibleType(Type type); /// the LLVM dialect. bool isCompatibleFloatingPointType(Type type); +/// Returns `true` if the given type is a vector type compatible with the LLVM +/// dialect. Compatible types include 1D built-in vector types of built-in +/// integers and floating-point values, LLVM dialect fixed vector types of LLVM +/// dialect pointers and LLVM dialect scalable vector types. +bool isCompatibleVectorType(Type type); + +/// Returns the element type of any vector type compatible with the LLVM +/// dialect. +Type getVectorElementType(Type type); + +/// Returns the element count of any LLVM-compatible vector type. +llvm::ElementCount getVectorNumElements(Type type); + +/// Creates an LLVM dialect-compatible type with the given element type and +/// length. +Type getFixedVectorType(Type elementType, unsigned numElements); + /// Returns the size of the given primitive LLVM dialect-compatible type /// (including vectors) in bits, for example, the size of i16 is 16 and -/// the size of !llvm.vec<4 x i16> is 64. Returns 0 for non-primitive +/// the size of vector<4xi16> is 64. Returns 0 for non-primitive /// (aggregates such as struct) or types that don't have a size (such as void). llvm::TypeSize getPrimitiveTypeSizeInBits(Type type); diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir index 1d076e64ba2b8d..9d390fe950e131 100644 --- a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir @@ -12,74 +12,74 @@ module { %1 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %2 = llvm.mlir.constant(3.000000e+00 : f32) : f32 %3 = llvm.mlir.constant(4.000000e+00 : f32) : f32 - %4 = llvm.mlir.undef : !llvm.vec<4 x f32> + %4 = llvm.mlir.undef : vector<4xf32> %5 = llvm.mlir.constant(0 : index) : i64 - %6 = llvm.insertelement %0, %4[%5 : i64] : !llvm.vec<4 x f32> + %6 = llvm.insertelement %0, %4[%5 : i64] : vector<4xf32> %7 = llvm.shufflevector %6, %4 [0 : i32, 0 : i32, 0 : i32, 0 : i32] - : !llvm.vec<4 x f32>, !llvm.vec<4 x f32> + : vector<4xf32>, vector<4xf32> %8 = llvm.mlir.constant(1 : i64) : i64 - %9 = llvm.insertelement %1, %7[%8 : i64] : !llvm.vec<4 x f32> + %9 = llvm.insertelement %1, %7[%8 : i64] : vector<4xf32> %10 = llvm.mlir.constant(2 : i64) : i64 - %11 = llvm.insertelement %2, %9[%10 : i64] : !llvm.vec<4 x f32> + %11 = llvm.insertelement %2, %9[%10 : i64] : vector<4xf32> %12 = llvm.mlir.constant(3 : i64) : i64 - %v = llvm.insertelement %3, %11[%12 : i64] : !llvm.vec<4 x f32> + %v = llvm.insertelement %3, %11[%12 : i64] : vector<4xf32> %max = "llvm.intr.vector.reduce.fmax"(%v) - : (!llvm.vec<4 x f32>) -> f32 + : (vector<4xf32>) -> f32 llvm.call @printF32(%max) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 4 %min = "llvm.intr.vector.reduce.fmin"(%v) - : (!llvm.vec<4 x f32>) -> f32 + : (vector<4xf32>) -> f32 llvm.call @printF32(%min) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 1 %add1 = "llvm.intr.vector.reduce.fadd"(%0, %v) - : (f32, !llvm.vec<4 x f32>) -> f32 + : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%add1) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 11 %add1r = "llvm.intr.vector.reduce.fadd"(%0, %v) - {reassoc = true} : (f32, !llvm.vec<4 x f32>) -> f32 + {reassoc = true} : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%add1r) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 11 %add2 = "llvm.intr.vector.reduce.fadd"(%1, %v) - : (f32, !llvm.vec<4 x f32>) -> f32 + : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%add2) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 12 %add2r = "llvm.intr.vector.reduce.fadd"(%1, %v) - {reassoc = true} : (f32, !llvm.vec<4 x f32>) -> f32 + {reassoc = true} : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%add2r) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 12 %mul1 = "llvm.intr.vector.reduce.fmul"(%0, %v) - : (f32, !llvm.vec<4 x f32>) -> f32 + : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%mul1) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 24 %mul1r = "llvm.intr.vector.reduce.fmul"(%0, %v) - {reassoc = true} : (f32, !llvm.vec<4 x f32>) -> f32 + {reassoc = true} : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%mul1r) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 24 %mul2 = "llvm.intr.vector.reduce.fmul"(%1, %v) - : (f32, !llvm.vec<4 x f32>) -> f32 + : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%mul2) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 48 %mul2r = "llvm.intr.vector.reduce.fmul"(%1, %v) - {reassoc = true} : (f32, !llvm.vec<4 x f32>) -> f32 + {reassoc = true} : (f32, vector<4xf32>) -> f32 llvm.call @printF32(%mul2r) : (f32) -> () llvm.call @printNewline() : () -> () // CHECK: 48 diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir index 181e2e3ce0fc30..74e8667bf2ecaf 100644 --- a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir @@ -12,68 +12,68 @@ module { %1 = llvm.mlir.constant(2 : i64) : i64 %2 = llvm.mlir.constant(3 : i64) : i64 %3 = llvm.mlir.constant(4 : i64) : i64 - %4 = llvm.mlir.undef : !llvm.vec<4 x i64> + %4 = llvm.mlir.undef : vector<4xi64> %5 = llvm.mlir.constant(0 : index) : i64 - %6 = llvm.insertelement %0, %4[%5 : i64] : !llvm.vec<4 x i64> + %6 = llvm.insertelement %0, %4[%5 : i64] : vector<4xi64> %7 = llvm.shufflevector %6, %4 [0 : i64, 0 : i64, 0 : i64, 0 : i64] - : !llvm.vec<4 x i64>, !llvm.vec<4 x i64> + : vector<4xi64>, vector<4xi64> %8 = llvm.mlir.constant(1 : i64) : i64 - %9 = llvm.insertelement %1, %7[%8 : i64] : !llvm.vec<4 x i64> + %9 = llvm.insertelement %1, %7[%8 : i64] : vector<4xi64> %10 = llvm.mlir.constant(2 : i64) : i64 - %11 = llvm.insertelement %2, %9[%10 : i64] : !llvm.vec<4 x i64> + %11 = llvm.insertelement %2, %9[%10 : i64] : vector<4xi64> %12 = llvm.mlir.constant(3 : i64) : i64 - %v = llvm.insertelement %3, %11[%12 : i64] : !llvm.vec<4 x i64> + %v = llvm.insertelement %3, %11[%12 : i64] : vector<4xi64> %add = "llvm.intr.vector.reduce.add"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%add) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 10 %and = "llvm.intr.vector.reduce.and"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%and) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 0 %mul = "llvm.intr.vector.reduce.mul"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%mul) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 24 %or = "llvm.intr.vector.reduce.or"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%or) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 7 %smax = "llvm.intr.vector.reduce.smax"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%smax) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 4 %smin = "llvm.intr.vector.reduce.smin"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%smin) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 1 %umax = "llvm.intr.vector.reduce.umax"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%umax) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 4 %umin = "llvm.intr.vector.reduce.umin"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%umin) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 1 %xor = "llvm.intr.vector.reduce.xor"(%v) - : (!llvm.vec<4 x i64>) -> i64 + : (vector<4xi64>) -> i64 llvm.call @printI64(%xor) : (i64) -> () llvm.call @printNewline() : () -> () // CHECK: 4 diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp index f60ba96e3a2057..0c868cb549b7e2 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp @@ -66,8 +66,8 @@ static unsigned getBitWidth(Type type) { /// Returns the bit width of LLVMType integer or vector. static unsigned getLLVMTypeBitWidth(Type type) { - auto vectorType = type.dyn_cast(); - return (vectorType ? vectorType.getElementType() : type) + return (LLVM::isCompatibleVectorType(type) ? LLVM::getVectorElementType(type) + : type) .cast() .getWidth(); } diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 512273347e4b00..c2dea0c99540a5 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -390,16 +390,16 @@ Type LLVMTypeConverter::convertMemRefToBarePtr(BaseMemRefType type) { return LLVM::LLVMPointerType::get(elementType, type.getMemorySpace()); } -// Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when -// n > 1. -// For example, `vector<4 x f32>` converts to `!llvm.type<"<4 x f32>">` and -// `vector<4 x 8 x 16 f32>` converts to `!llvm."[4 x [8 x <16 x f32>]]">`. +/// Convert an n-D vector type to an LLVM vector type via (n-1)-D array type +/// when n > 1. For example, `vector<4 x f32>` remains as is while, +/// `vector<4x8x16xf32>` converts to `!llvm.array<4xarray<8 x vector<16xf32>>>`. Type LLVMTypeConverter::convertVectorType(VectorType type) { auto elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - Type vectorType = - LLVM::LLVMFixedVectorType::get(elementType, type.getShape().back()); + Type vectorType = VectorType::get(type.getShape().back(), elementType); + assert(LLVM::isCompatibleVectorType(vectorType) && + "expected vector type compatible with the LLVM dialect"); auto shape = type.getShape(); for (int i = shape.size() - 2; i >= 0; --i) vectorType = LLVM::LLVMArrayType::get(vectorType, shape[i]); @@ -1500,7 +1500,7 @@ static NDVectorTypeInfo extractNDVectorTypeInfo(VectorType vectorType, llvmTy.cast().getNumElements()); llvmTy = llvmTy.cast().getElementType(); } - if (!llvmTy.isa()) + if (!LLVM::isCompatibleVectorType(llvmTy)) return info; info.llvmVectorTy = llvmTy; return info; @@ -2484,7 +2484,7 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { if (!operandType.isa()) { LLVM::ConstantOp one; - if (operandType.isa()) { + if (LLVM::isCompatibleVectorType(operandType)) { one = rewriter.create( loc, operandType, SplatElementsAttr::get(resultType.cast(), floatOne)); @@ -2505,8 +2505,7 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { [&](Type llvmVectorTy, ValueRange operands) { auto splatAttr = SplatElementsAttr::get( mlir::VectorType::get( - {llvmVectorTy.cast() - .getNumElements()}, + {LLVM::getVectorNumElements(llvmVectorTy).getFixedValue()}, floatType), floatOne); auto one = diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 5dd0b028767abc..9e4c8bd127fac0 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -182,7 +182,7 @@ static LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, if (failed(getBase(rewriter, loc, memref, memRefType, base))) return failure(); auto pType = MemRefDescriptor(memref).getElementPtrType(); - auto ptrsType = LLVM::LLVMFixedVectorType::get(pType, vType.getDimSize(0)); + auto ptrsType = LLVM::getFixedVectorType(pType, vType.getDimSize(0)); ptrs = rewriter.create(loc, ptrsType, base, indices); return success(); } @@ -192,8 +192,7 @@ static LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, // used when source/dst memrefs are not on address space 0. static Value castDataPtr(ConversionPatternRewriter &rewriter, Location loc, Value ptr, MemRefType memRefType, Type vt) { - auto pType = - LLVM::LLVMPointerType::get(vt.template cast()); + auto pType = LLVM::LLVMPointerType::get(vt); if (memRefType.getMemorySpace() == 0) return rewriter.create(loc, pType, ptr); return rewriter.create(loc, pType, ptr); @@ -1226,7 +1225,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // // TODO: when the leaf transfer rank is k > 1, we need the last `k` // dimensions here. - unsigned vecWidth = vtp.getNumElements(); + unsigned vecWidth = LLVM::getVectorNumElements(vtp).getFixedValue(); unsigned lastIndex = llvm::size(xferOp.indices()) - 1; Value off = xferOp.indices()[lastIndex]; Value dim = rewriter.create(loc, xferOp.source(), lastIndex); diff --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp index d27f097a3baa3a..005e7b30ea7c61 100644 --- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp +++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp @@ -78,9 +78,8 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { auto toLLVMTy = [&](Type t) { return this->getTypeConverter()->convertType(t); }; - auto vecTy = toLLVMTy(xferOp.getVectorType()) - .template cast(); - unsigned vecWidth = vecTy.getNumElements(); + auto vecTy = toLLVMTy(xferOp.getVectorType()); + unsigned vecWidth = LLVM::getVectorNumElements(vecTy).getFixedValue(); Location loc = xferOp->getLoc(); // The backend result vector scalarization have trouble scalarize @@ -120,18 +119,13 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // to it. Type i64Ty = rewriter.getIntegerType(64); Value i64x2Ty = rewriter.create( - loc, - LLVM::LLVMFixedVectorType::get(toLLVMTy(i64Ty).template cast(), - 2), - constConfig); + loc, LLVM::getFixedVectorType(toLLVMTy(i64Ty), 2), constConfig); Value dataPtrAsI64 = rewriter.create( loc, toLLVMTy(i64Ty).template cast(), dataPtr); Value zero = this->createIndexConstant(rewriter, loc, 0); Value dwordConfig = rewriter.create( - loc, - LLVM::LLVMFixedVectorType::get(toLLVMTy(i64Ty).template cast(), - 2), - i64x2Ty, dataPtrAsI64, zero); + loc, LLVM::getFixedVectorType(toLLVMTy(i64Ty), 2), i64x2Ty, + dataPtrAsI64, zero); dwordConfig = rewriter.create(loc, toLLVMTy(i32Vecx4), dwordConfig); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 3c9329ff0eb5a3..b895c904d623d5 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -150,9 +150,9 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) { if (!isCompatibleType(type)) return parser.emitError(trailingTypeLoc, "expected LLVM dialect-compatible type"); - if (auto vecArgType = type.dyn_cast()) - resultType = - LLVMFixedVectorType::get(resultType, vecArgType.getNumElements()); + if (LLVM::isCompatibleVectorType(type)) + resultType = LLVM::getFixedVectorType( + resultType, LLVM::getVectorNumElements(type).getFixedValue()); assert(!type.isa() && "unhandled scalable vector"); @@ -913,8 +913,8 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { void LLVM::ExtractElementOp::build(OpBuilder &b, OperationState &result, Value vector, Value position, ArrayRef attrs) { - auto vectorType = vector.getType().cast(); - auto llvmType = vectorType.getElementType(); + auto vectorType = vector.getType(); + auto llvmType = LLVM::getVectorElementType(vectorType); build(b, result, llvmType, vector, position); result.addAttributes(attrs); } @@ -941,11 +941,10 @@ static ParseResult parseExtractElementOp(OpAsmParser &parser, parser.resolveOperand(vector, type, result.operands) || parser.resolveOperand(position, positionType, result.operands)) return failure(); - auto vectorType = type.dyn_cast(); - if (!vectorType) + if (!LLVM::isCompatibleVectorType(type)) return parser.emitError( - loc, "expected LLVM IR dialect vector type for operand #1"); - result.addTypes(vectorType.getElementType()); + loc, "expected LLVM dialect-compatible vector type for operand #1"); + result.addTypes(LLVM::getVectorElementType(type)); return success(); } @@ -1057,11 +1056,10 @@ static ParseResult parseInsertElementOp(OpAsmParser &parser, parser.parseColonType(vectorType)) return failure(); - auto llvmVectorType = vectorType.dyn_cast(); - if (!llvmVectorType) + if (!LLVM::isCompatibleVectorType(vectorType)) return parser.emitError( - loc, "expected LLVM IR dialect vector type for operand #1"); - Type valueType = llvmVectorType.getElementType(); + loc, "expected LLVM dialect-compatible vector type for operand #1"); + Type valueType = LLVM::getVectorElementType(vectorType); if (!valueType) return failure(); @@ -1278,21 +1276,8 @@ static LogicalResult verifyCast(DialectCastOp op, Type llvmType, Type type, // Vectors are compatible if they are 1D non-scalable, and their element types // are compatible. - if (auto vectorType = type.dyn_cast()) { - if (vectorType.getRank() != 1) - return op->emitOpError("only 1-d vector is allowed"); - - auto llvmVector = llvmType.dyn_cast(); - if (!llvmVector) - return op->emitOpError("only fixed-sized vector is allowed"); - - if (vectorType.getDimSize(0) != llvmVector.getNumElements()) - return op->emitOpError( - "invalid cast between vectors with mismatching sizes"); - - return verifyCast(op, llvmVector.getElementType(), - vectorType.getElementType(), /*isElement=*/true); - } + if (auto vectorType = type.dyn_cast()) + return op.emitOpError("vector types should not be casted"); if (auto memrefType = type.dyn_cast()) { // Bare pointer convention: statically-shaped memref is compatible with an @@ -1543,9 +1528,9 @@ static LogicalResult verify(GlobalOp op) { void LLVM::ShuffleVectorOp::build(OpBuilder &b, OperationState &result, Value v1, Value v2, ArrayAttr mask, ArrayRef attrs) { - auto containerType = v1.getType().cast(); - auto vType = - LLVMFixedVectorType::get(containerType.getElementType(), mask.size()); + auto containerType = v1.getType(); + auto vType = LLVM::getFixedVectorType( + LLVM::getVectorElementType(containerType), mask.size()); build(b, result, vType, v1, v2, mask); result.addAttributes(attrs); } @@ -1575,12 +1560,11 @@ static ParseResult parseShuffleVectorOp(OpAsmParser &parser, parser.resolveOperand(v1, typeV1, result.operands) || parser.resolveOperand(v2, typeV2, result.operands)) return failure(); - auto containerType = typeV1.dyn_cast(); - if (!containerType) + if (!LLVM::isCompatibleVectorType(typeV1)) return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); - auto vType = - LLVMFixedVectorType::get(containerType.getElementType(), maskAttr.size()); + auto vType = LLVM::getFixedVectorType(LLVM::getVectorElementType(typeV1), + maskAttr.size()); result.addTypes(vType); return success(); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp index 18a4262bcaf835..3ff69006da42f0 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp @@ -24,8 +24,7 @@ using namespace mlir::LLVM; /// internal functions to avoid getting a verbose `!llvm` prefix. Otherwise /// prints it as usual. static void dispatchPrint(DialectAsmPrinter &printer, Type type) { - if (isCompatibleType(type) && !type.isa() && - !type.isa()) + if (isCompatibleType(type) && !type.isa()) return mlir::LLVM::detail::printType(type, printer); printer.printType(type); } @@ -43,7 +42,8 @@ static StringRef getTypeKeyword(Type type) { .Case([&](Type) { return "metadata"; }) .Case([&](Type) { return "func"; }) .Case([&](Type) { return "ptr"; }) - .Case([&](Type) { return "vec"; }) + .Case( + [&](Type) { return "vec"; }) .Case([&](Type) { return "array"; }) .Case([&](Type) { return "struct"; }) .Default([](Type) -> StringRef { @@ -236,7 +236,7 @@ static LLVMPointerType parsePointerType(DialectAsmParser &parser) { /// Parses an LLVM dialect vector type. /// llvm-type ::= `vec<` `? x`? integer `x` llvm-type `>` /// Supports both fixed and scalable vectors. -static LLVMVectorType parseVectorType(DialectAsmParser &parser) { +static Type parseVectorType(DialectAsmParser &parser) { SmallVector dims; llvm::SMLoc dimPos; Type elementType; @@ -244,7 +244,7 @@ static LLVMVectorType parseVectorType(DialectAsmParser &parser) { if (parser.parseLess() || parser.getCurrentLocation(&dimPos) || parser.parseDimensionList(dims, /*allowDynamic=*/true) || dispatchParse(parser, elementType) || parser.parseGreater()) - return LLVMVectorType(); + return Type(); // We parsed a generic dimension list, but vectors only support two forms: // - single non-dynamic entry in the list (fixed vector); @@ -255,12 +255,14 @@ static LLVMVectorType parseVectorType(DialectAsmParser &parser) { (dims.size() == 2 && dims[1] == -1)) { parser.emitError(dimPos) << "expected '? x x ' or ' x '"; - return LLVMVectorType(); + return Type(); } bool isScalable = dims.size() == 2; if (isScalable) return LLVMScalableVectorType::getChecked(loc, elementType, dims[1]); + if (elementType.isSignlessIntOrFloat()) + return VectorType::getChecked(loc, dims, elementType); return LLVMFixedVectorType::getChecked(loc, elementType, dims[0]); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index ce6f052eb8712a..ace7194011ac25 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -236,38 +236,15 @@ LogicalResult LLVMStructType::verifyConstructionInvariants(Location loc, // Vector types. //===----------------------------------------------------------------------===// -bool LLVMVectorType::isValidElementType(Type type) { - if (auto intType = type.dyn_cast()) - return intType.isSignless(); - return type.isa() || - mlir::LLVM::isCompatibleFloatingPointType(type); -} - -/// Support type casting functionality. -bool LLVMVectorType::classof(Type type) { - return type.isa(); -} - -Type LLVMVectorType::getElementType() { - // Both derived classes share the implementation type. - return static_cast(impl)->elementType; -} - -llvm::ElementCount LLVMVectorType::getElementCount() { - // Both derived classes share the implementation type. - return llvm::ElementCount::get( - static_cast(impl)->numElements, - isa()); -} - /// Verifies that the type about to be constructed is well-formed. -LogicalResult -LLVMVectorType::verifyConstructionInvariants(Location loc, Type elementType, - unsigned numElements) { +template +static LogicalResult verifyVectorConstructionInvariants(Location loc, + Type elementType, + unsigned numElements) { if (numElements == 0) return emitError(loc, "the number of vector elements must be positive"); - if (!isValidElementType(elementType)) + if (!VecTy::isValidElementType(elementType)) return emitError(loc, "invalid vector element type"); return success(); @@ -286,10 +263,29 @@ LLVMFixedVectorType LLVMFixedVectorType::getChecked(Location loc, return Base::getChecked(loc, elementType, numElements); } +Type LLVMFixedVectorType::getElementType() { + return static_cast(impl)->elementType; +} + unsigned LLVMFixedVectorType::getNumElements() { return getImpl()->numElements; } +bool LLVMFixedVectorType::isValidElementType(Type type) { + return type + .isa(); +} + +LogicalResult LLVMFixedVectorType::verifyConstructionInvariants( + Location loc, Type elementType, unsigned numElements) { + return verifyVectorConstructionInvariants( + loc, elementType, numElements); +} + +//===----------------------------------------------------------------------===// +// LLVMScalableVectorType. +//===----------------------------------------------------------------------===// + LLVMScalableVectorType LLVMScalableVectorType::get(Type elementType, unsigned minNumElements) { assert(elementType && "expected non-null subtype"); @@ -303,10 +299,27 @@ LLVMScalableVectorType::getChecked(Location loc, Type elementType, return Base::getChecked(loc, elementType, minNumElements); } +Type LLVMScalableVectorType::getElementType() { + return static_cast(impl)->elementType; +} + unsigned LLVMScalableVectorType::getMinNumElements() { return getImpl()->numElements; } +bool LLVMScalableVectorType::isValidElementType(Type type) { + if (auto intType = type.dyn_cast()) + return intType.isSignless(); + + return isCompatibleFloatingPointType(type) || type.isa(); +} + +LogicalResult LLVMScalableVectorType::verifyConstructionInvariants( + Location loc, Type elementType, unsigned numElements) { + return verifyVectorConstructionInvariants( + loc, elementType, numElements); +} + //===----------------------------------------------------------------------===// // Utility functions. //===----------------------------------------------------------------------===// @@ -316,6 +329,10 @@ bool mlir::LLVM::isCompatibleType(Type type) { if (auto intType = type.dyn_cast()) return intType.isSignless(); + // 1D vector types are compatible if their element types are. + if (auto vecType = type.dyn_cast()) + return vecType.getRank() == 1 && isCompatibleType(vecType.getElementType()); + // clang-format off return type.isa< BFloat16Type, @@ -331,7 +348,8 @@ bool mlir::LLVM::isCompatibleType(Type type) { LLVMPointerType, LLVMStructType, LLVMTokenType, - LLVMVectorType, + LLVMFixedVectorType, + LLVMScalableVectorType, LLVMVoidType, LLVMX86FP80Type, LLVMX86MMXType @@ -344,6 +362,55 @@ bool mlir::LLVM::isCompatibleFloatingPointType(Type type) { LLVMFP128Type, LLVMPPCFP128Type, LLVMX86FP80Type>(); } +bool mlir::LLVM::isCompatibleVectorType(Type type) { + if (type.isa()) + return true; + + if (auto vecType = type.dyn_cast()) { + if (vecType.getRank() != 1) + return false; + Type elementType = vecType.getElementType(); + if (auto intType = elementType.dyn_cast()) + return intType.isSignless(); + return elementType + .isa(); + } + return false; +} + +Type mlir::LLVM::getVectorElementType(Type type) { + return llvm::TypeSwitch(type) + .Case( + [](auto ty) { return ty.getElementType(); }) + .Default([](Type) -> Type { + llvm_unreachable("incompatible with LLVM vector type"); + }); +} + +llvm::ElementCount mlir::LLVM::getVectorNumElements(Type type) { + return llvm::TypeSwitch(type) + .Case([](auto ty) { + return llvm::ElementCount::getFixed(ty.getNumElements()); + }) + .Case([](LLVMScalableVectorType ty) { + return llvm::ElementCount::getScalable(ty.getMinNumElements()); + }) + .Default([](Type) -> llvm::ElementCount { + llvm_unreachable("incompatible with LLVM vector type"); + }); +} + +Type mlir::LLVM::getFixedVectorType(Type elementType, unsigned numElements) { + bool useLLVM = LLVMFixedVectorType::isValidElementType(elementType); + bool useBuiltIn = VectorType::isValidElementType(elementType); + (void)useBuiltIn; + assert((useLLVM ^ useBuiltIn) && "expected LLVM-compatible fixed-vector type " + "to be either builtin or LLVM dialect type"); + if (useLLVM) + return LLVMFixedVectorType::get(elementType, numElements); + return VectorType::get(numElements, elementType); +} + llvm::TypeSize mlir::LLVM::getPrimitiveTypeSizeInBits(Type type) { assert(isCompatibleType(type) && "expected a type compatible with the LLVM dialect"); @@ -360,15 +427,19 @@ llvm::TypeSize mlir::LLVM::getPrimitiveTypeSizeInBits(Type type) { .Case([](Type) { return llvm::TypeSize::Fixed(80); }) .Case( [](Type) { return llvm::TypeSize::Fixed(128); }) - .Case([](LLVMVectorType t) { + .Case([](LLVMFixedVectorType t) { + llvm::TypeSize elementSize = + getPrimitiveTypeSizeInBits(t.getElementType()); + return llvm::TypeSize(elementSize.getFixedSize() * t.getNumElements(), + elementSize.isScalable()); + }) + .Case([](VectorType t) { + assert(isCompatibleVectorType(t) && + "unexpected incompatible with LLVM vector type"); llvm::TypeSize elementSize = getPrimitiveTypeSizeInBits(t.getElementType()); - llvm::ElementCount elementCount = t.getElementCount(); - assert(!elementSize.isScalable() && - "vector type should have fixed-width elements"); - return llvm::TypeSize(elementSize.getFixedSize() * - elementCount.getKnownMinValue(), - elementCount.isScalable()); + return llvm::TypeSize(elementSize.getFixedSize() * t.getNumElements(), + elementSize.isScalable()); }) .Default([](Type ty) { assert((ty.isa()) { - auto numElements = vectorType.getElementCount(); + if (LLVM::isCompatibleVectorType(type)) { + auto numElements = LLVM::getVectorNumElements(type); if (numElements.isScalable()) { emitError(unknownLoc) << "scalable vectors not supported"; return nullptr; } - Type elementType = getStdTypeForAttr(vectorType.getElementType()); + Type elementType = getStdTypeForAttr(LLVM::getVectorElementType(type)); if (!elementType) return nullptr; return VectorType::get(numElements.getKnownMinValue(), elementType); @@ -200,16 +200,16 @@ Type Importer::getStdTypeForAttr(Type type) { // If the innermost type is a vector, use the multi-dimensional vector as // attribute type. - if (auto vectorType = - arrayType.getElementType().dyn_cast()) { - auto numElements = vectorType.getElementCount(); + if (LLVM::isCompatibleVectorType(arrayType.getElementType())) { + auto numElements = LLVM::getVectorNumElements(arrayType.getElementType()); if (numElements.isScalable()) { emitError(unknownLoc) << "scalable vectors not supported"; return nullptr; } shape.push_back(numElements.getKnownMinValue()); - Type elementType = getStdTypeForAttr(vectorType.getElementType()); + Type elementType = getStdTypeForAttr( + LLVM::getVectorElementType(arrayType.getElementType())); if (!elementType) return nullptr; return VectorType::get(shape, elementType); diff --git a/mlir/lib/Target/LLVMIR/TypeTranslation.cpp b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp index bb773f09cf2c11..50f4836fba28fc 100644 --- a/mlir/lib/Target/LLVMIR/TypeTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp @@ -72,7 +72,8 @@ class TypeToLLVMIRTranslatorImpl { }) .Case( + LLVM::LLVMFixedVectorType, LLVM::LLVMScalableVectorType, + VectorType>( [this](auto type) { return this->translate(type); }) .Default([](Type t) -> llvm::Type * { llvm_unreachable("unknown LLVM dialect type"); @@ -132,6 +133,14 @@ class TypeToLLVMIRTranslatorImpl { return structType; } + /// Translates the given built-in vector type compatible with LLVM. + llvm::Type *translate(VectorType type) { + assert(LLVM::isCompatibleVectorType(type) && + "expected compatible with LLVM vector type"); + return llvm::FixedVectorType::get(translateType(type.getElementType()), + type.getNumElements()); + } + /// Translates the given fixed-vector type. llvm::Type *translate(LLVM::LLVMFixedVectorType type) { return llvm::FixedVectorType::get(translateType(type.getElementType()), @@ -285,8 +294,8 @@ class TypeFromLLVMIRTranslatorImpl { /// Translates the given fixed-vector type. Type translate(llvm::FixedVectorType *type) { - return LLVM::LLVMFixedVectorType::get(translateType(type->getElementType()), - type->getNumElements()); + return LLVM::getFixedVectorType(translateType(type->getElementType()), + type->getNumElements()); } /// Translates the given scalable-vector type. diff --git a/mlir/test/Conversion/ArmNeonToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/ArmNeonToLLVM/convert-to-llvm.mlir index fe56052fe73429..d95abf4dd50eca 100644 --- a/mlir/test/Conversion/ArmNeonToLLVM/convert-to-llvm.mlir +++ b/mlir/test/Conversion/ArmNeonToLLVM/convert-to-llvm.mlir @@ -3,17 +3,17 @@ // CHECK-LABEL: arm_neon_smull func @arm_neon_smull(%a: vector<8xi8>, %b: vector<8xi8>) -> (vector<8xi16>, vector<4xi32>, vector<2xi64>) { - // CHECK: arm_neon.smull{{.*}}: (!llvm.vec<8 x i8>, !llvm.vec<8 x i8>) -> !llvm.vec<8 x i16> + // CHECK: arm_neon.smull{{.*}}: (vector<8xi8>, vector<8xi8>) -> vector<8xi16> %0 = arm_neon.smull %a, %b : vector<8xi8> to vector<8xi16> %00 = vector.extract_strided_slice %0 {offsets = [3], sizes = [4], strides = [1]}: vector<8xi16> to vector<4xi16> - // CHECK: arm_neon.smull{{.*}}: (!llvm.vec<4 x i16>, !llvm.vec<4 x i16>) -> !llvm.vec<4 x i32> + // CHECK: arm_neon.smull{{.*}}: (vector<4xi16>, vector<4xi16>) -> vector<4xi32> %1 = arm_neon.smull %00, %00 : vector<4xi16> to vector<4xi32> %11 = vector.extract_strided_slice %1 {offsets = [1], sizes = [2], strides = [1]}: vector<4xi32> to vector<2xi32> - // CHECK: arm_neon.smull{{.*}}: (!llvm.vec<2 x i32>, !llvm.vec<2 x i32>) -> !llvm.vec<2 x i64> + // CHECK: arm_neon.smull{{.*}}: (vector<2xi32>, vector<2xi32>) -> vector<2xi64> %2 = arm_neon.smull %11, %11 : vector<2xi32> to vector<2xi64> return %0, %1, %2 : vector<8xi16>, vector<4xi32>, vector<2xi64> diff --git a/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir index 0e8dfab788550b..5a1a3eb6209b6f 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir @@ -13,7 +13,7 @@ spv.func @iadd_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @iadd_vector spv.func @iadd_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) "None" { - // CHECK: llvm.add %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.add %{{.*}}, %{{.*}} : vector<4xi64> %0 = spv.IAdd %arg0, %arg1 : vector<4xi64> spv.Return } @@ -31,7 +31,7 @@ spv.func @isub_scalar(%arg0: i8, %arg1: i8) "None" { // CHECK-LABEL: @isub_vector spv.func @isub_vector(%arg0: vector<2xi16>, %arg1: vector<2xi16>) "None" { - // CHECK: llvm.sub %{{.*}}, %{{.*}} : !llvm.vec<2 x i16> + // CHECK: llvm.sub %{{.*}}, %{{.*}} : vector<2xi16> %0 = spv.ISub %arg0, %arg1 : vector<2xi16> spv.Return } @@ -49,7 +49,7 @@ spv.func @imul_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @imul_vector spv.func @imul_vector(%arg0: vector<3xi32>, %arg1: vector<3xi32>) "None" { - // CHECK: llvm.mul %{{.*}}, %{{.*}} : !llvm.vec<3 x i32> + // CHECK: llvm.mul %{{.*}}, %{{.*}} : vector<3xi32> %0 = spv.IMul %arg0, %arg1 : vector<3xi32> spv.Return } @@ -67,7 +67,7 @@ spv.func @fadd_scalar(%arg0: f16, %arg1: f16) "None" { // CHECK-LABEL: @fadd_vector spv.func @fadd_vector(%arg0: vector<4xf32>, %arg1: vector<4xf32>) "None" { - // CHECK: llvm.fadd %{{.*}}, %{{.*}} : !llvm.vec<4 x f32> + // CHECK: llvm.fadd %{{.*}}, %{{.*}} : vector<4xf32> %0 = spv.FAdd %arg0, %arg1 : vector<4xf32> spv.Return } @@ -85,7 +85,7 @@ spv.func @fsub_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @fsub_vector spv.func @fsub_vector(%arg0: vector<2xf32>, %arg1: vector<2xf32>) "None" { - // CHECK: llvm.fsub %{{.*}}, %{{.*}} : !llvm.vec<2 x f32> + // CHECK: llvm.fsub %{{.*}}, %{{.*}} : vector<2xf32> %0 = spv.FSub %arg0, %arg1 : vector<2xf32> spv.Return } @@ -103,7 +103,7 @@ spv.func @fdiv_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @fdiv_vector spv.func @fdiv_vector(%arg0: vector<3xf64>, %arg1: vector<3xf64>) "None" { - // CHECK: llvm.fdiv %{{.*}}, %{{.*}} : !llvm.vec<3 x f64> + // CHECK: llvm.fdiv %{{.*}}, %{{.*}} : vector<3xf64> %0 = spv.FDiv %arg0, %arg1 : vector<3xf64> spv.Return } @@ -121,7 +121,7 @@ spv.func @fmul_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @fmul_vector spv.func @fmul_vector(%arg0: vector<2xf32>, %arg1: vector<2xf32>) "None" { - // CHECK: llvm.fmul %{{.*}}, %{{.*}} : !llvm.vec<2 x f32> + // CHECK: llvm.fmul %{{.*}}, %{{.*}} : vector<2xf32> %0 = spv.FMul %arg0, %arg1 : vector<2xf32> spv.Return } @@ -139,7 +139,7 @@ spv.func @frem_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @frem_vector spv.func @frem_vector(%arg0: vector<3xf64>, %arg1: vector<3xf64>) "None" { - // CHECK: llvm.frem %{{.*}}, %{{.*}} : !llvm.vec<3 x f64> + // CHECK: llvm.frem %{{.*}}, %{{.*}} : vector<3xf64> %0 = spv.FRem %arg0, %arg1 : vector<3xf64> spv.Return } @@ -157,7 +157,7 @@ spv.func @fneg_scalar(%arg: f64) "None" { // CHECK-LABEL: @fneg_vector spv.func @fneg_vector(%arg: vector<2xf32>) "None" { - // CHECK: llvm.fneg %{{.*}} : !llvm.vec<2 x f32> + // CHECK: llvm.fneg %{{.*}} : vector<2xf32> %0 = spv.FNegate %arg : vector<2xf32> spv.Return } @@ -175,7 +175,7 @@ spv.func @udiv_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @udiv_vector spv.func @udiv_vector(%arg0: vector<3xi64>, %arg1: vector<3xi64>) "None" { - // CHECK: llvm.udiv %{{.*}}, %{{.*}} : !llvm.vec<3 x i64> + // CHECK: llvm.udiv %{{.*}}, %{{.*}} : vector<3xi64> %0 = spv.UDiv %arg0, %arg1 : vector<3xi64> spv.Return } @@ -193,7 +193,7 @@ spv.func @umod_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @umod_vector spv.func @umod_vector(%arg0: vector<3xi64>, %arg1: vector<3xi64>) "None" { - // CHECK: llvm.urem %{{.*}}, %{{.*}} : !llvm.vec<3 x i64> + // CHECK: llvm.urem %{{.*}}, %{{.*}} : vector<3xi64> %0 = spv.UMod %arg0, %arg1 : vector<3xi64> spv.Return } @@ -211,7 +211,7 @@ spv.func @sdiv_scalar(%arg0: i16, %arg1: i16) "None" { // CHECK-LABEL: @sdiv_vector spv.func @sdiv_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.sdiv %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.sdiv %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.SDiv %arg0, %arg1 : vector<2xi64> spv.Return } @@ -229,7 +229,7 @@ spv.func @srem_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @srem_vector spv.func @srem_vector(%arg0: vector<4xi32>, %arg1: vector<4xi32>) "None" { - // CHECK: llvm.srem %{{.*}}, %{{.*}} : !llvm.vec<4 x i32> + // CHECK: llvm.srem %{{.*}}, %{{.*}} : vector<4xi32> %0 = spv.SRem %arg0, %arg1 : vector<4xi32> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir index db1ac3a6d4d0e8..488dd7ea37d8c4 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir @@ -13,7 +13,7 @@ spv.func @bitcount_scalar(%arg0: i16) "None" { // CHECK-LABEL: @bitcount_vector spv.func @bitcount_vector(%arg0: vector<3xi32>) "None" { - // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (!llvm.vec<3 x i32>) -> !llvm.vec<3 x i32> + // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (vector<3xi32>) -> vector<3xi32> %0 = spv.BitCount %arg0: vector<3xi32> spv.Return } @@ -31,7 +31,7 @@ spv.func @bitreverse_scalar(%arg0: i64) "None" { // CHECK-LABEL: @bitreverse_vector spv.func @bitreverse_vector(%arg0: vector<4xi32>) "None" { - // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (!llvm.vec<4 x i32>) -> !llvm.vec<4 x i32> + // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (vector<4xi32>) -> vector<4xi32> %0 = spv.BitReverse %arg0: vector<4xi32> spv.Return } @@ -90,26 +90,26 @@ spv.func @bitfield_insert_scalar_greater_bit_width(%base: i16, %insert: i16, %of } // CHECK-LABEL: @bitfield_insert_vector -// CHECK-SAME: %[[BASE:.*]]: !llvm.vec<2 x i32>, %[[INSERT:.*]]: !llvm.vec<2 x i32>, %[[OFFSET:.*]]: i32, %[[COUNT:.*]]: i32 +// CHECK-SAME: %[[BASE:.*]]: vector<2xi32>, %[[INSERT:.*]]: vector<2xi32>, %[[OFFSET:.*]]: i32, %[[COUNT:.*]]: i32 spv.func @bitfield_insert_vector(%base: vector<2xi32>, %insert: vector<2xi32>, %offset: i32, %count: i32) "None" { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : vector<2xi32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : i32] : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : i32] : vector<2xi32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : i32] : !llvm.vec<2 x i32> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : i32] : vector<2xi32> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : vector<2xi32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : i32] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : i32] : vector<2xi32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : i32] : !llvm.vec<2 x i32> - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm.vec<2 x i32> - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm.vec<2 x i32> - // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.vec<2 x i32> - // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> - // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.vec<2 x i32> - // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.vec<2 x i32> - // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> - // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : i32] : vector<2xi32> + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : vector<2xi32> + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : vector<2xi32> + // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : vector<2xi32> + // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET_V2]] : vector<2xi32> + // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : vector<2xi32> + // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : vector<2xi32> + // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET_V2]] : vector<2xi32> + // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : vector<2xi32> %0 = spv.BitFieldInsert %base, %insert, %offset, %count : vector<2xi32>, i32, i32 spv.Return } @@ -162,24 +162,24 @@ spv.func @bitfield_sextract_scalar_greater_bit_width(%base: i32, %offset: i64, % } // CHECK-LABEL: @bitfield_sextract_vector -// CHECK-SAME: %[[BASE:.*]]: !llvm.vec<2 x i32>, %[[OFFSET:.*]]: i32, %[[COUNT:.*]]: i32 +// CHECK-SAME: %[[BASE:.*]]: vector<2xi32>, %[[OFFSET:.*]]: i32, %[[COUNT:.*]]: i32 spv.func @bitfield_sextract_vector(%base: vector<2xi32>, %offset: i32, %count: i32) "None" { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : vector<2xi32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : i32] : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : i32] : vector<2xi32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : i32] : !llvm.vec<2 x i32> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : i32] : vector<2xi32> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : vector<2xi32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : i32] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : i32] : vector<2xi32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : i32] : !llvm.vec<2 x i32> - // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(dense<32> : vector<2xi32>) : !llvm.vec<2 x i32> - // CHECK: %[[T0:.*]] = llvm.add %[[COUNT_V2]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> - // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.vec<2 x i32> - // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.vec<2 x i32> - // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET_V2]], %[[T1]] : !llvm.vec<2 x i32> - // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : i32] : vector<2xi32> + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(dense<32> : vector<2xi32>) : vector<2xi32> + // CHECK: %[[T0:.*]] = llvm.add %[[COUNT_V2]], %[[OFFSET_V2]] : vector<2xi32> + // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : vector<2xi32> + // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : vector<2xi32> + // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET_V2]], %[[T1]] : vector<2xi32> + // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : vector<2xi32> %0 = spv.BitFieldSExtract %base, %offset, %count : vector<2xi32>, i32, i32 spv.Return } @@ -228,23 +228,23 @@ spv.func @bitfield_uextract_scalar_greater_bit_width(%base: i8, %offset: i16, %c } // CHECK-LABEL: @bitfield_uextract_vector -// CHECK-SAME: %[[BASE:.*]]: !llvm.vec<2 x i32>, %[[OFFSET:.*]]: i32, %[[COUNT:.*]]: i32 +// CHECK-SAME: %[[BASE:.*]]: vector<2xi32>, %[[OFFSET:.*]]: i32, %[[COUNT:.*]]: i32 spv.func @bitfield_uextract_vector(%base: vector<2xi32>, %offset: i32, %count: i32) "None" { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : vector<2xi32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : i32] : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : i32] : vector<2xi32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : i32] : !llvm.vec<2 x i32> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : i32] : vector<2xi32> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : vector<2xi32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : i32] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : i32] : vector<2xi32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : i32] : !llvm.vec<2 x i32> - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm.vec<2 x i32> - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm.vec<2 x i32> - // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.vec<2 x i32> - // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> - // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : i32] : vector<2xi32> + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : vector<2xi32> + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : vector<2xi32> + // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : vector<2xi32> + // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET_V2]] : vector<2xi32> + // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : vector<2xi32> %0 = spv.BitFieldUExtract %base, %offset, %count : vector<2xi32>, i32, i32 spv.Return } @@ -262,7 +262,7 @@ spv.func @bitwise_and_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @bitwise_and_vector spv.func @bitwise_and_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) "None" { - // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.and %{{.*}}, %{{.*}} : vector<4xi64> %0 = spv.BitwiseAnd %arg0, %arg1 : vector<4xi64> spv.Return } @@ -280,7 +280,7 @@ spv.func @bitwise_or_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @bitwise_or_vector spv.func @bitwise_or_vector(%arg0: vector<3xi8>, %arg1: vector<3xi8>) "None" { - // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm.vec<3 x i8> + // CHECK: llvm.or %{{.*}}, %{{.*}} : vector<3xi8> %0 = spv.BitwiseOr %arg0, %arg1 : vector<3xi8> spv.Return } @@ -298,7 +298,7 @@ spv.func @bitwise_xor_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @bitwise_xor_vector spv.func @bitwise_xor_vector(%arg0: vector<2xi16>, %arg1: vector<2xi16>) "None" { - // CHECK: llvm.xor %{{.*}}, %{{.*}} : !llvm.vec<2 x i16> + // CHECK: llvm.xor %{{.*}}, %{{.*}} : vector<2xi16> %0 = spv.BitwiseXor %arg0, %arg1 : vector<2xi16> spv.Return } @@ -317,8 +317,8 @@ spv.func @not_scalar(%arg0: i32) "None" { // CHECK-LABEL: @not_vector spv.func @not_vector(%arg0: vector<2xi16>) "None" { - // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi16>) : !llvm.vec<2 x i16> - // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm.vec<2 x i16> + // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi16>) : vector<2xi16> + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : vector<2xi16> %0 = spv.Not %arg0 : vector<2xi16> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir index 8f67a5fcab7074..bfabc8c7d0c679 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir @@ -13,28 +13,28 @@ spv.func @bitcast_float_to_integer_scalar(%arg0 : f32) "None" { // CHECK-LABEL: @bitcast_float_to_integer_vector spv.func @bitcast_float_to_integer_vector(%arg0 : vector<3xf32>) "None" { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.vec<3 x f32> to !llvm.vec<3 x i32> + // CHECK: {{.*}} = llvm.bitcast {{.*}} : vector<3xf32> to vector<3xi32> %0 = spv.Bitcast %arg0: vector<3xf32> to vector<3xi32> spv.Return } // CHECK-LABEL: @bitcast_vector_to_scalar spv.func @bitcast_vector_to_scalar(%arg0 : vector<2xf32>) "None" { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.vec<2 x f32> to i64 + // CHECK: {{.*}} = llvm.bitcast {{.*}} : vector<2xf32> to i64 %0 = spv.Bitcast %arg0: vector<2xf32> to i64 spv.Return } // CHECK-LABEL: @bitcast_scalar_to_vector spv.func @bitcast_scalar_to_vector(%arg0 : f64) "None" { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : f64 to !llvm.vec<2 x i32> + // CHECK: {{.*}} = llvm.bitcast {{.*}} : f64 to vector<2xi32> %0 = spv.Bitcast %arg0: f64 to vector<2xi32> spv.Return } // CHECK-LABEL: @bitcast_vector_to_vector spv.func @bitcast_vector_to_vector(%arg0 : vector<4xf32>) "None" { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.vec<4 x f32> to !llvm.vec<2 x i64> + // CHECK: {{.*}} = llvm.bitcast {{.*}} : vector<4xf32> to vector<2xi64> %0 = spv.Bitcast %arg0: vector<4xf32> to vector<2xi64> spv.Return } @@ -59,7 +59,7 @@ spv.func @convert_float_to_signed_scalar(%arg0: f32) "None" { // CHECK-LABEL: @convert_float_to_signed_vector spv.func @convert_float_to_signed_vector(%arg0: vector<2xf32>) "None" { - // CHECK: llvm.fptosi %{{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x i32> + // CHECK: llvm.fptosi %{{.*}} : vector<2xf32> to vector<2xi32> %0 = spv.ConvertFToS %arg0: vector<2xf32> to vector<2xi32> spv.Return } @@ -77,7 +77,7 @@ spv.func @convert_float_to_unsigned_scalar(%arg0: f32) "None" { // CHECK-LABEL: @convert_float_to_unsigned_vector spv.func @convert_float_to_unsigned_vector(%arg0: vector<2xf32>) "None" { - // CHECK: llvm.fptoui %{{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x i32> + // CHECK: llvm.fptoui %{{.*}} : vector<2xf32> to vector<2xi32> %0 = spv.ConvertFToU %arg0: vector<2xf32> to vector<2xi32> spv.Return } @@ -95,7 +95,7 @@ spv.func @convert_signed_to_float_scalar(%arg0: i32) "None" { // CHECK-LABEL: @convert_signed_to_float_vector spv.func @convert_signed_to_float_vector(%arg0: vector<3xi32>) "None" { - // CHECK: llvm.sitofp %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x f32> + // CHECK: llvm.sitofp %{{.*}} : vector<3xi32> to vector<3xf32> %0 = spv.ConvertSToF %arg0: vector<3xi32> to vector<3xf32> spv.Return } @@ -113,7 +113,7 @@ spv.func @convert_unsigned_to_float_scalar(%arg0: i32) "None" { // CHECK-LABEL: @convert_unsigned_to_float_vector spv.func @convert_unsigned_to_float_vector(%arg0: vector<3xi32>) "None" { - // CHECK: llvm.uitofp %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x f32> + // CHECK: llvm.uitofp %{{.*}} : vector<3xi32> to vector<3xf32> %0 = spv.ConvertUToF %arg0: vector<3xi32> to vector<3xf32> spv.Return } @@ -134,10 +134,10 @@ spv.func @fconvert_scalar(%arg0: f32, %arg1: f64) "None" { // CHECK-LABEL: @fconvert_vector spv.func @fconvert_vector(%arg0: vector<2xf32>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fpext %{{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x f64> + // CHECK: llvm.fpext %{{.*}} : vector<2xf32> to vector<2xf64> %0 = spv.FConvert %arg0: vector<2xf32> to vector<2xf64> - // CHECK: llvm.fptrunc %{{.*}} : !llvm.vec<2 x f64> to !llvm.vec<2 x f32> + // CHECK: llvm.fptrunc %{{.*}} : vector<2xf64> to vector<2xf32> %1 = spv.FConvert %arg1: vector<2xf64> to vector<2xf32> spv.Return } @@ -158,10 +158,10 @@ spv.func @sconvert_scalar(%arg0: i32, %arg1: i64) "None" { // CHECK-LABEL: @sconvert_vector spv.func @sconvert_vector(%arg0: vector<3xi32>, %arg1: vector<3xi64>) "None" { - // CHECK: llvm.sext %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x i64> + // CHECK: llvm.sext %{{.*}} : vector<3xi32> to vector<3xi64> %0 = spv.SConvert %arg0: vector<3xi32> to vector<3xi64> - // CHECK: llvm.trunc %{{.*}} : !llvm.vec<3 x i64> to !llvm.vec<3 x i32> + // CHECK: llvm.trunc %{{.*}} : vector<3xi64> to vector<3xi32> %1 = spv.SConvert %arg1: vector<3xi64> to vector<3xi32> spv.Return } @@ -182,10 +182,10 @@ spv.func @uconvert_scalar(%arg0: i32, %arg1: i64) "None" { // CHECK-LABEL: @uconvert_vector spv.func @uconvert_vector(%arg0: vector<3xi32>, %arg1: vector<3xi64>) "None" { - // CHECK: llvm.zext %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x i64> + // CHECK: llvm.zext %{{.*}} : vector<3xi32> to vector<3xi64> %0 = spv.UConvert %arg0: vector<3xi32> to vector<3xi64> - // CHECK: llvm.trunc %{{.*}} : !llvm.vec<3 x i64> to !llvm.vec<3 x i32> + // CHECK: llvm.trunc %{{.*}} : vector<3xi64> to vector<3xi32> %1 = spv.UConvert %arg1: vector<3xi64> to vector<3xi32> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir index 632136c5ede03c..355e313d9f0c06 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir @@ -13,7 +13,7 @@ spv.func @i_equal_scalar(%arg0: i32, %arg1: i32) "None" { // CHECK-LABEL: @i_equal_vector spv.func @i_equal_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) "None" { - // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : vector<4xi64> %0 = spv.IEqual %arg0, %arg1 : vector<4xi64> spv.Return } @@ -31,7 +31,7 @@ spv.func @i_not_equal_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @i_not_equal_vector spv.func @i_not_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.INotEqual %arg0, %arg1 : vector<2xi64> spv.Return } @@ -49,7 +49,7 @@ spv.func @s_greater_than_equal_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @s_greater_than_equal_vector spv.func @s_greater_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "sge" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "sge" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.SGreaterThanEqual %arg0, %arg1 : vector<2xi64> spv.Return } @@ -67,7 +67,7 @@ spv.func @s_greater_than_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @s_greater_than_vector spv.func @s_greater_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "sgt" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "sgt" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.SGreaterThan %arg0, %arg1 : vector<2xi64> spv.Return } @@ -85,7 +85,7 @@ spv.func @s_less_than_equal_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @s_less_than_equal_vector spv.func @s_less_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "sle" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "sle" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.SLessThanEqual %arg0, %arg1 : vector<2xi64> spv.Return } @@ -103,7 +103,7 @@ spv.func @s_less_than_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @s_less_than_vector spv.func @s_less_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "slt" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.SLessThan %arg0, %arg1 : vector<2xi64> spv.Return } @@ -121,7 +121,7 @@ spv.func @u_greater_than_equal_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @u_greater_than_equal_vector spv.func @u_greater_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "uge" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "uge" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.UGreaterThanEqual %arg0, %arg1 : vector<2xi64> spv.Return } @@ -139,7 +139,7 @@ spv.func @u_greater_than_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @u_greater_than_vector spv.func @u_greater_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "ugt" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "ugt" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.UGreaterThan %arg0, %arg1 : vector<2xi64> spv.Return } @@ -157,7 +157,7 @@ spv.func @u_less_than_equal_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @u_less_than_equal_vector spv.func @u_less_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "ule" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "ule" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.ULessThanEqual %arg0, %arg1 : vector<2xi64> spv.Return } @@ -175,7 +175,7 @@ spv.func @u_less_than_scalar(%arg0: i64, %arg1: i64) "None" { // CHECK-LABEL: @u_less_than_vector spv.func @u_less_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) "None" { - // CHECK: llvm.icmp "ult" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> + // CHECK: llvm.icmp "ult" %{{.*}}, %{{.*}} : vector<2xi64> %0 = spv.ULessThan %arg0, %arg1 : vector<2xi64> spv.Return } @@ -193,7 +193,7 @@ spv.func @f_ord_equal_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @f_ord_equal_vector spv.func @f_ord_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) "None" { - // CHECK: llvm.fcmp "oeq" %{{.*}}, %{{.*}} : !llvm.vec<4 x f64> + // CHECK: llvm.fcmp "oeq" %{{.*}}, %{{.*}} : vector<4xf64> %0 = spv.FOrdEqual %arg0, %arg1 : vector<4xf64> spv.Return } @@ -211,7 +211,7 @@ spv.func @f_ord_greater_than_equal_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_ord_greater_than_equal_vector spv.func @f_ord_greater_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "oge" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "oge" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FOrdGreaterThanEqual %arg0, %arg1 : vector<2xf64> spv.Return } @@ -229,7 +229,7 @@ spv.func @f_ord_greater_than_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_ord_greater_than_vector spv.func @f_ord_greater_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "ogt" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "ogt" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FOrdGreaterThan %arg0, %arg1 : vector<2xf64> spv.Return } @@ -247,7 +247,7 @@ spv.func @f_ord_less_than_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_ord_less_than_vector spv.func @f_ord_less_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "olt" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "olt" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FOrdLessThan %arg0, %arg1 : vector<2xf64> spv.Return } @@ -265,7 +265,7 @@ spv.func @f_ord_less_than_equal_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_ord_less_than_equal_vector spv.func @f_ord_less_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "ole" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "ole" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FOrdLessThanEqual %arg0, %arg1 : vector<2xf64> spv.Return } @@ -283,7 +283,7 @@ spv.func @f_ord_not_equal_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @f_ord_not_equal_vector spv.func @f_ord_not_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) "None" { - // CHECK: llvm.fcmp "one" %{{.*}}, %{{.*}} : !llvm.vec<4 x f64> + // CHECK: llvm.fcmp "one" %{{.*}}, %{{.*}} : vector<4xf64> %0 = spv.FOrdNotEqual %arg0, %arg1 : vector<4xf64> spv.Return } @@ -301,7 +301,7 @@ spv.func @f_unord_equal_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @f_unord_equal_vector spv.func @f_unord_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) "None" { - // CHECK: llvm.fcmp "ueq" %{{.*}}, %{{.*}} : !llvm.vec<4 x f64> + // CHECK: llvm.fcmp "ueq" %{{.*}}, %{{.*}} : vector<4xf64> %0 = spv.FUnordEqual %arg0, %arg1 : vector<4xf64> spv.Return } @@ -319,7 +319,7 @@ spv.func @f_unord_greater_than_equal_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_unord_greater_than_equal_vector spv.func @f_unord_greater_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "uge" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "uge" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FUnordGreaterThanEqual %arg0, %arg1 : vector<2xf64> spv.Return } @@ -337,7 +337,7 @@ spv.func @f_unord_greater_than_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_unord_greater_than_vector spv.func @f_unord_greater_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "ugt" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "ugt" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FUnordGreaterThan %arg0, %arg1 : vector<2xf64> spv.Return } @@ -355,7 +355,7 @@ spv.func @f_unord_less_than_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_unord_less_than_vector spv.func @f_unord_less_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "ult" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "ult" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FUnordLessThan %arg0, %arg1 : vector<2xf64> spv.Return } @@ -373,7 +373,7 @@ spv.func @f_unord_less_than_equal_scalar(%arg0: f64, %arg1: f64) "None" { // CHECK-LABEL: @f_unord_less_than_equal_vector spv.func @f_unord_less_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) "None" { - // CHECK: llvm.fcmp "ule" %{{.*}}, %{{.*}} : !llvm.vec<2 x f64> + // CHECK: llvm.fcmp "ule" %{{.*}}, %{{.*}} : vector<2xf64> %0 = spv.FUnordLessThanEqual %arg0, %arg1 : vector<2xf64> spv.Return } @@ -391,7 +391,7 @@ spv.func @f_unord_not_equal_scalar(%arg0: f32, %arg1: f32) "None" { // CHECK-LABEL: @f_unord_not_equal_vector spv.func @f_unord_not_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) "None" { - // CHECK: llvm.fcmp "une" %{{.*}}, %{{.*}} : !llvm.vec<4 x f64> + // CHECK: llvm.fcmp "une" %{{.*}}, %{{.*}} : vector<4xf64> %0 = spv.FUnordNotEqual %arg0, %arg1 : vector<4xf64> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir index 949aa0376d14b7..0ab431b3ac2409 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir @@ -15,9 +15,9 @@ spv.func @bool_constant_scalar() "None" { // CHECK-LABEL: @bool_constant_vector spv.func @bool_constant_vector() "None" { - // CHECK: llvm.mlir.constant(dense<[true, false]> : vector<2xi1>) : !llvm.vec<2 x i1> + // CHECK: llvm.mlir.constant(dense<[true, false]> : vector<2xi1>) : vector<2xi1> %0 = spv.constant dense<[true, false]> : vector<2xi1> - // CHECK: llvm.mlir.constant(dense : vector<3xi1>) : !llvm.vec<3 x i1> + // CHECK: llvm.mlir.constant(dense : vector<3xi1>) : vector<3xi1> %1 = spv.constant dense : vector<3xi1> spv.Return } @@ -35,11 +35,11 @@ spv.func @integer_constant_scalar() "None" { // CHECK-LABEL: @integer_constant_vector spv.func @integer_constant_vector() "None" { - // CHECK: llvm.mlir.constant(dense<[2, 3]> : vector<2xi32>) : !llvm.vec<2 x i32> + // CHECK: llvm.mlir.constant(dense<[2, 3]> : vector<2xi32>) : vector<2xi32> %0 = spv.constant dense<[2, 3]> : vector<2xi32> - // CHECK: llvm.mlir.constant(dense<-4> : vector<2xi32>) : !llvm.vec<2 x i32> + // CHECK: llvm.mlir.constant(dense<-4> : vector<2xi32>) : vector<2xi32> %1 = spv.constant dense<-4> : vector<2xsi32> - // CHECK: llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm.vec<3 x i32> + // CHECK: llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : vector<3xi32> %2 = spv.constant dense<[2, 3, 4]> : vector<3xui32> spv.Return } @@ -55,7 +55,7 @@ spv.func @float_constant_scalar() "None" { // CHECK-LABEL: @float_constant_vector spv.func @float_constant_vector() "None" { - // CHECK: llvm.mlir.constant(dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32>) : !llvm.vec<2 x f32> + // CHECK: llvm.mlir.constant(dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32>) : vector<2xf32> %0 = spv.constant dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/func-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/func-ops-to-llvm.mlir index 0928b0fa6c4a0f..e196a304a87210 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/func-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/func-ops-to-llvm.mlir @@ -54,7 +54,7 @@ spv.func @scalar_types(%arg0: i32, %arg1: i1, %arg2: f64, %arg3: f32) "None" { spv.Return } -// CHECK-LABEL: llvm.func @vector_types(%arg0: !llvm.vec<2 x i64>, %arg1: !llvm.vec<2 x i64>) -> !llvm.vec<2 x i64> +// CHECK-LABEL: llvm.func @vector_types(%arg0: vector<2xi64>, %arg1: vector<2xi64>) -> vector<2xi64> spv.func @vector_types(%arg0: vector<2xi64>, %arg1: vector<2xi64>) -> vector<2xi64> "None" { %0 = spv.IAdd %arg0, %arg1 : vector<2xi64> spv.ReturnValue %0 : vector<2xi64> @@ -65,12 +65,12 @@ spv.func @vector_types(%arg0: vector<2xi64>, %arg1: vector<2xi64>) -> vector<2xi //===----------------------------------------------------------------------===// // CHECK-LABEL: llvm.func @function_calls -// CHECK-SAME: %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: f64, %[[ARG3:.*]]: !llvm.vec<2 x i64>, %[[ARG4:.*]]: !llvm.vec<2 x f32> +// CHECK-SAME: %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: f64, %[[ARG3:.*]]: vector<2xi64>, %[[ARG4:.*]]: vector<2xf32> spv.func @function_calls(%arg0: i32, %arg1: i1, %arg2: f64, %arg3: vector<2xi64>, %arg4: vector<2xf32>) "None" { // CHECK: llvm.call @void_1() : () -> () - // CHECK: llvm.call @void_2(%[[ARG3]]) : (!llvm.vec<2 x i64>) -> () + // CHECK: llvm.call @void_2(%[[ARG3]]) : (vector<2xi64>) -> () // CHECK: llvm.call @value_scalar(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (i32, i1, f64) -> i32 - // CHECK: llvm.call @value_vector(%[[ARG3]], %[[ARG4]]) : (!llvm.vec<2 x i64>, !llvm.vec<2 x f32>) -> !llvm.vec<2 x f32> + // CHECK: llvm.call @value_vector(%[[ARG3]], %[[ARG4]]) : (vector<2xi64>, vector<2xf32>) -> vector<2xf32> spv.FunctionCall @void_1() : () -> () spv.FunctionCall @void_2(%arg3) : (vector<2xi64>) -> () %0 = spv.FunctionCall @value_scalar(%arg0, %arg1, %arg2) : (i32, i1, f64) -> i32 diff --git a/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir index 62d0dec74060fc..c9243f660fbaa0 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir @@ -8,7 +8,7 @@ spv.func @ceil(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.ceil"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.Ceil %arg0 : f32 - // CHECK: "llvm.intr.ceil"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.ceil"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.Ceil %arg1 : vector<3xf16> spv.Return } @@ -21,7 +21,7 @@ spv.func @ceil(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @cos(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.Cos %arg0 : f32 - // CHECK: "llvm.intr.cos"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.cos"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.Cos %arg1 : vector<3xf16> spv.Return } @@ -34,7 +34,7 @@ spv.func @cos(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @exp(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.Exp %arg0 : f32 - // CHECK: "llvm.intr.exp"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.exp"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.Exp %arg1 : vector<3xf16> spv.Return } @@ -47,7 +47,7 @@ spv.func @exp(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @fabs(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.fabs"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.FAbs %arg0 : f32 - // CHECK: "llvm.intr.fabs"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.fabs"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.FAbs %arg1 : vector<3xf16> spv.Return } @@ -60,7 +60,7 @@ spv.func @fabs(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @floor(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.floor"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.Floor %arg0 : f32 - // CHECK: "llvm.intr.floor"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.floor"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.Floor %arg1 : vector<3xf16> spv.Return } @@ -73,7 +73,7 @@ spv.func @floor(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @fmax(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %0 = spv.GLSL.FMax %arg0, %arg0 : f32 - // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x f16>, !llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.FMax %arg1, %arg1 : vector<3xf16> spv.Return } @@ -86,7 +86,7 @@ spv.func @fmax(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @fmin(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %0 = spv.GLSL.FMin %arg0, %arg0 : f32 - // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x f16>, !llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.FMin %arg1, %arg1 : vector<3xf16> spv.Return } @@ -99,7 +99,7 @@ spv.func @fmin(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @log(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.log"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.Log %arg0 : f32 - // CHECK: "llvm.intr.log"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.log"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.Log %arg1 : vector<3xf16> spv.Return } @@ -112,7 +112,7 @@ spv.func @log(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @sin(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.sin"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.Sin %arg0 : f32 - // CHECK: "llvm.intr.sin"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.sin"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.Sin %arg1 : vector<3xf16> spv.Return } @@ -125,7 +125,7 @@ spv.func @sin(%arg0: f32, %arg1: vector<3xf16>) "None" { spv.func @smax(%arg0: i16, %arg1: vector<3xi32>) "None" { // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 %0 = spv.GLSL.SMax %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x i32>, !llvm.vec<3 x i32>) -> !llvm.vec<3 x i32> + // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> %1 = spv.GLSL.SMax %arg1, %arg1 : vector<3xi32> spv.Return } @@ -138,7 +138,7 @@ spv.func @smax(%arg0: i16, %arg1: vector<3xi32>) "None" { spv.func @smin(%arg0: i16, %arg1: vector<3xi32>) "None" { // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 %0 = spv.GLSL.SMin %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x i32>, !llvm.vec<3 x i32>) -> !llvm.vec<3 x i32> + // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> %1 = spv.GLSL.SMin %arg1, %arg1 : vector<3xi32> spv.Return } @@ -151,7 +151,7 @@ spv.func @smin(%arg0: i16, %arg1: vector<3xi32>) "None" { spv.func @sqrt(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 %0 = spv.GLSL.Sqrt %arg0 : f32 - // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (!llvm.vec<3 x f16>) -> !llvm.vec<3 x f16> + // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spv.GLSL.Sqrt %arg1 : vector<3xf16> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir index a61fb7316fb304..d052a3bf406cc5 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir @@ -13,7 +13,7 @@ spv.func @logical_equal_scalar(%arg0: i1, %arg1: i1) "None" { // CHECK-LABEL: @logical_equal_vector spv.func @logical_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) "None" { - // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : vector<4xi1> %0 = spv.LogicalEqual %arg0, %arg0 : vector<4xi1> spv.Return } @@ -31,7 +31,7 @@ spv.func @logical_not_equal_scalar(%arg0: i1, %arg1: i1) "None" { // CHECK-LABEL: @logical_not_equal_vector spv.func @logical_not_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) "None" { - // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : vector<4xi1> %0 = spv.LogicalNotEqual %arg0, %arg0 : vector<4xi1> spv.Return } @@ -50,8 +50,8 @@ spv.func @logical_not_scalar(%arg0: i1) "None" { // CHECK-LABEL: @logical_not_vector spv.func @logical_not_vector(%arg0: vector<4xi1>) "None" { - // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense : vector<4xi1>) : !llvm.vec<4 x i1> - // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm.vec<4 x i1> + // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense : vector<4xi1>) : vector<4xi1> + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : vector<4xi1> %0 = spv.LogicalNot %arg0 : vector<4xi1> spv.Return } @@ -69,7 +69,7 @@ spv.func @logical_and_scalar(%arg0: i1, %arg1: i1) "None" { // CHECK-LABEL: @logical_and_vector spv.func @logical_and_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) "None" { - // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> + // CHECK: llvm.and %{{.*}}, %{{.*}} : vector<4xi1> %0 = spv.LogicalAnd %arg0, %arg0 : vector<4xi1> spv.Return } @@ -87,7 +87,7 @@ spv.func @logical_or_scalar(%arg0: i1, %arg1: i1) "None" { // CHECK-LABEL: @logical_or_vector spv.func @logical_or_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) "None" { - // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> + // CHECK: llvm.or %{{.*}}, %{{.*}} : vector<4xi1> %0 = spv.LogicalOr %arg0, %arg0 : vector<4xi1> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir index ccf8068320ea27..6aab710220e793 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir @@ -184,17 +184,17 @@ spv.func @variable_scalar_with_initialization() "None" { // CHECK-LABEL: @variable_vector spv.func @variable_vector() "None" { // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: llvm.alloca %[[SIZE]] x !llvm.vec<3 x f32> : (i32) -> !llvm.ptr> + // CHECK: llvm.alloca %[[SIZE]] x vector<3xf32> : (i32) -> !llvm.ptr> %0 = spv.Variable : !spv.ptr, Function> spv.Return } // CHECK-LABEL: @variable_vector_with_initialization spv.func @variable_vector_with_initialization() "None" { - // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(dense : vector<3xi1>) : !llvm.vec<3 x i1> + // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(dense : vector<3xi1>) : vector<3xi1> // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[ALLOCATED:.*]] = llvm.alloca %[[SIZE]] x !llvm.vec<3 x i1> : (i32) -> !llvm.ptr> - // CHECK: llvm.store %[[VALUE]], %[[ALLOCATED]] : !llvm.ptr> + // CHECK: %[[ALLOCATED:.*]] = llvm.alloca %[[SIZE]] x vector<3xi1> : (i32) -> !llvm.ptr> + // CHECK: llvm.store %[[VALUE]], %[[ALLOCATED]] : !llvm.ptr> %c = spv.constant dense : vector<3xi1> %0 = spv.Variable init(%c) : !spv.ptr, Function> spv.Return diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir index b95c3ff6b0036b..47263c079c8f3d 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir @@ -14,7 +14,7 @@ spv.func @composite_extract_array(%arg: !spv.array<4x!spv.array<4xf32>>) "None" // CHECK-LABEL: @composite_extract_vector spv.func @composite_extract_vector(%arg: vector<3xf32>) "None" { // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 - // CHECK: llvm.extractelement %{{.*}}[%[[ZERO]] : i32] : !llvm.vec<3 x f32> + // CHECK: llvm.extractelement %{{.*}}[%[[ZERO]] : i32] : vector<3xf32> %0 = spv.CompositeExtract %arg[0 : i32] : vector<3xf32> spv.Return } @@ -33,7 +33,7 @@ spv.func @composite_insert_struct(%arg0: i32, %arg1: !spv.struct<(f32, !spv.arra // CHECK-LABEL: @composite_insert_vector spv.func @composite_insert_vector(%arg0: vector<3xf32>, %arg1: f32) "None" { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 - // CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%[[ONE]] : i32] : !llvm.vec<3 x f32> + // CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%[[ONE]] : i32] : vector<3xf32> %0 = spv.CompositeInsert %arg1, %arg0[1 : i32] : f32 into vector<3xf32> spv.Return } @@ -44,7 +44,7 @@ spv.func @composite_insert_vector(%arg0: vector<3xf32>, %arg1: f32) "None" { // CHECK-LABEL: @select_scalar spv.func @select_scalar(%arg0: i1, %arg1: vector<3xi32>, %arg2: f32) "None" { - // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : i1, !llvm.vec<3 x i32> + // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : i1, vector<3xi32> %0 = spv.Select %arg0, %arg1, %arg1 : i1, vector<3xi32> // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : i1, f32 %1 = spv.Select %arg0, %arg2, %arg2 : i1, f32 @@ -53,7 +53,7 @@ spv.func @select_scalar(%arg0: i1, %arg1: vector<3xi32>, %arg2: f32) "None" { // CHECK-LABEL: @select_vector spv.func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) "None" { - // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.vec<2 x i1>, !llvm.vec<2 x i32> + // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : vector<2xi1>, vector<2xi32> %0 = spv.Select %arg0, %arg1, %arg1 : vector<2xi1>, vector<2xi32> spv.Return } @@ -119,7 +119,7 @@ spv.func @undef_scalar() "None" { // CHECK-LABEL: @undef_vector spv.func @undef_vector() "None" { - // CHECK: llvm.mlir.undef : !llvm.vec<2 x i32> + // CHECK: llvm.mlir.undef : vector<2xi32> %0 = spv.undef : vector<2xi32> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir index c28328385dcfc1..9b76409dbd47c1 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir @@ -24,18 +24,18 @@ spv.func @shift_right_arithmetic_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %a // CHECK-LABEL: @shift_right_arithmetic_vector spv.func @shift_right_arithmetic_vector(%arg0: vector<4xi64>, %arg1: vector<4xui64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) "None" { - // CHECK: llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.ashr %{{.*}}, %{{.*}} : vector<4xi64> %0 = spv.ShiftRightArithmetic %arg0, %arg0 : vector<4xi64>, vector<4xi64> - // CHECK: llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.ashr %{{.*}}, %{{.*}} : vector<4xi64> %1 = spv.ShiftRightArithmetic %arg0, %arg1 : vector<4xi64>, vector<4xui64> - // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: llvm.ashr %{{.*}}, %[[SEXT]] : !llvm.vec<4 x i64> + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : vector<4xi32> to vector<4xi64> + // CHECK: llvm.ashr %{{.*}}, %[[SEXT]] : vector<4xi64> %2 = spv.ShiftRightArithmetic %arg0, %arg2 : vector<4xi64>, vector<4xi32> - // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: llvm.ashr %{{.*}}, %[[ZEXT]] : !llvm.vec<4 x i64> + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : vector<4xi32> to vector<4xi64> + // CHECK: llvm.ashr %{{.*}}, %[[ZEXT]] : vector<4xi64> %3 = spv.ShiftRightArithmetic %arg0, %arg3 : vector<4xi64>, vector<4xui32> spv.Return } @@ -64,18 +64,18 @@ spv.func @shift_right_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : si16, %arg // CHECK-LABEL: @shift_right_logical_vector spv.func @shift_right_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) "None" { - // CHECK: llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.lshr %{{.*}}, %{{.*}} : vector<4xi64> %0 = spv.ShiftRightLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> - // CHECK: llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.lshr %{{.*}}, %{{.*}} : vector<4xi64> %1 = spv.ShiftRightLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> - // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: llvm.lshr %{{.*}}, %[[SEXT]] : !llvm.vec<4 x i64> + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : vector<4xi32> to vector<4xi64> + // CHECK: llvm.lshr %{{.*}}, %[[SEXT]] : vector<4xi64> %2 = spv.ShiftRightLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> - // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: llvm.lshr %{{.*}}, %[[ZEXT]] : !llvm.vec<4 x i64> + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : vector<4xi32> to vector<4xi64> + // CHECK: llvm.lshr %{{.*}}, %[[ZEXT]] : vector<4xi64> %3 = spv.ShiftRightLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> spv.Return } @@ -104,18 +104,18 @@ spv.func @shift_left_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %arg3 // CHECK-LABEL: @shift_left_logical_vector spv.func @shift_left_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) "None" { - // CHECK: llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.shl %{{.*}}, %{{.*}} : vector<4xi64> %0 = spv.ShiftLeftLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> - // CHECK: llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + // CHECK: llvm.shl %{{.*}}, %{{.*}} : vector<4xi64> %1 = spv.ShiftLeftLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> - // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: llvm.shl %{{.*}}, %[[SEXT]] : !llvm.vec<4 x i64> + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : vector<4xi32> to vector<4xi64> + // CHECK: llvm.shl %{{.*}}, %[[SEXT]] : vector<4xi64> %2 = spv.ShiftLeftLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> - // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: llvm.shl %{{.*}}, %[[ZEXT]] : !llvm.vec<4 x i64> + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : vector<4xi32> to vector<4xi64> + // CHECK: llvm.shl %{{.*}}, %[[ZEXT]] : vector<4xi64> %3 = spv.ShiftLeftLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir index f65ee5dc86f505..1d573f3f250521 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir @@ -4,7 +4,7 @@ // Array type //===----------------------------------------------------------------------===// -// CHECK-LABEL: @array(!llvm.array<16 x f32>, !llvm.array<32 x vec<4 x f32>>) +// CHECK-LABEL: @array(!llvm.array<16 x f32>, !llvm.array<32 x vector<4xf32>>) spv.func @array(!spv.array<16 x f32>, !spv.array< 32 x vector<4xf32> >) "None" // CHECK-LABEL: @array_with_natural_stride(!llvm.array<16 x f32>) @@ -17,14 +17,14 @@ spv.func @array_with_natural_stride(!spv.array<16 x f32, stride=4>) "None" // CHECK-LABEL: @pointer_scalar(!llvm.ptr, !llvm.ptr) spv.func @pointer_scalar(!spv.ptr, !spv.ptr) "None" -// CHECK-LABEL: @pointer_vector(!llvm.ptr>) +// CHECK-LABEL: @pointer_vector(!llvm.ptr>) spv.func @pointer_vector(!spv.ptr, Function>) "None" //===----------------------------------------------------------------------===// // Runtime array type //===----------------------------------------------------------------------===// -// CHECK-LABEL: @runtime_array_vector(!llvm.array<0 x vec<4 x f32>>) +// CHECK-LABEL: @runtime_array_vector(!llvm.array<0 x vector<4xf32>>) spv.func @runtime_array_vector(!spv.rtarray< vector<4xf32> >) "None" // CHECK-LABEL: @runtime_array_scalar(!llvm.array<0 x f32>) diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index 304d62c3935d09..0b334031aba997 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -487,35 +487,35 @@ func @multireturn_caller() { return } -// CHECK-LABEL: llvm.func @vector_ops(%arg0: !llvm.vec<4 x f32>, %arg1: !llvm.vec<4 x i1>, %arg2: !llvm.vec<4 x i64>, %arg3: !llvm.vec<4 x i64>) -> !llvm.vec<4 x f32> { +// CHECK-LABEL: llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> { func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> { -// CHECK-NEXT: %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm.vec<4 x f32> +// CHECK-NEXT: %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : vector<4xf32> %0 = constant dense<42.> : vector<4xf32> -// CHECK-NEXT: %1 = llvm.fadd %arg0, %0 : !llvm.vec<4 x f32> +// CHECK-NEXT: %1 = llvm.fadd %arg0, %0 : vector<4xf32> %1 = addf %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %2 = llvm.sdiv %arg2, %arg2 : !llvm.vec<4 x i64> +// CHECK-NEXT: %2 = llvm.sdiv %arg2, %arg2 : vector<4xi64> %3 = divi_signed %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %3 = llvm.udiv %arg2, %arg2 : !llvm.vec<4 x i64> +// CHECK-NEXT: %3 = llvm.udiv %arg2, %arg2 : vector<4xi64> %4 = divi_unsigned %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %4 = llvm.srem %arg2, %arg2 : !llvm.vec<4 x i64> +// CHECK-NEXT: %4 = llvm.srem %arg2, %arg2 : vector<4xi64> %5 = remi_signed %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %5 = llvm.urem %arg2, %arg2 : !llvm.vec<4 x i64> +// CHECK-NEXT: %5 = llvm.urem %arg2, %arg2 : vector<4xi64> %6 = remi_unsigned %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %6 = llvm.fdiv %arg0, %0 : !llvm.vec<4 x f32> +// CHECK-NEXT: %6 = llvm.fdiv %arg0, %0 : vector<4xf32> %7 = divf %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %7 = llvm.frem %arg0, %0 : !llvm.vec<4 x f32> +// CHECK-NEXT: %7 = llvm.frem %arg0, %0 : vector<4xf32> %8 = remf %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %8 = llvm.and %arg2, %arg3 : !llvm.vec<4 x i64> +// CHECK-NEXT: %8 = llvm.and %arg2, %arg3 : vector<4xi64> %9 = and %arg2, %arg3 : vector<4xi64> -// CHECK-NEXT: %9 = llvm.or %arg2, %arg3 : !llvm.vec<4 x i64> +// CHECK-NEXT: %9 = llvm.or %arg2, %arg3 : vector<4xi64> %10 = or %arg2, %arg3 : vector<4xi64> -// CHECK-NEXT: %10 = llvm.xor %arg2, %arg3 : !llvm.vec<4 x i64> +// CHECK-NEXT: %10 = llvm.xor %arg2, %arg3 : vector<4xi64> %11 = xor %arg2, %arg3 : vector<4xi64> -// CHECK-NEXT: %11 = llvm.shl %arg2, %arg2 : !llvm.vec<4 x i64> +// CHECK-NEXT: %11 = llvm.shl %arg2, %arg2 : vector<4xi64> %12 = shift_left %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %12 = llvm.ashr %arg2, %arg2 : !llvm.vec<4 x i64> +// CHECK-NEXT: %12 = llvm.ashr %arg2, %arg2 : vector<4xi64> %13 = shift_right_signed %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %13 = llvm.lshr %arg2, %arg2 : !llvm.vec<4 x i64> +// CHECK-NEXT: %13 = llvm.lshr %arg2, %arg2 : vector<4xi64> %14 = shift_right_unsigned %arg2, %arg2 : vector<4xi64> return %1 : vector<4xf32> } @@ -597,17 +597,17 @@ func @sitofp(%arg0 : i32, %arg1 : i64) { // Checking conversion of integer vectors to floating point vector types. // CHECK-LABEL: @sitofp_vector func @sitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) { -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi16> to vector<2xf32> %0 = sitofp %arg0: vector<2xi16> to vector<2xf32> -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi16> to vector<2xf64> %1 = sitofp %arg0: vector<2xi16> to vector<2xf64> -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi32> to vector<2xf32> %2 = sitofp %arg1: vector<2xi32> to vector<2xf32> -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi32> to vector<2xf64> %3 = sitofp %arg1: vector<2xi32> to vector<2xf64> -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi64> to vector<2xf32> %4 = sitofp %arg2: vector<2xi64> to vector<2xf32> -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi64> to vector<2xf64> %5 = sitofp %arg2: vector<2xi64> to vector<2xf64> return } @@ -641,11 +641,11 @@ func @fpext(%arg0 : f16, %arg1 : f32) { // Checking conversion of integer types to floating point. // CHECK-LABEL: @fpext func @fpext_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>) { -// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.vec<2 x f16> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf16> to vector<2xf32> %0 = fpext %arg0: vector<2xf16> to vector<2xf32> -// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.vec<2 x f16> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf16> to vector<2xf64> %1 = fpext %arg0: vector<2xf16> to vector<2xf64> -// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf32> to vector<2xf64> %2 = fpext %arg1: vector<2xf32> to vector<2xf64> return } @@ -667,17 +667,17 @@ func @fptosi(%arg0 : f32, %arg1 : f64) { // Checking conversion of floating point vectors to integer vector types. // CHECK-LABEL: @fptosi_vector func @fptosi_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) { -// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x f16> to !llvm.vec<2 x i32> +// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf16> to vector<2xi32> %0 = fptosi %arg0: vector<2xf16> to vector<2xi32> -// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x f16> to !llvm.vec<2 x i64> +// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf16> to vector<2xi64> %1 = fptosi %arg0: vector<2xf16> to vector<2xi64> -// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x i32> +// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf32> to vector<2xi32> %2 = fptosi %arg1: vector<2xf32> to vector<2xi32> -// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x i64> +// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf32> to vector<2xi64> %3 = fptosi %arg1: vector<2xf32> to vector<2xi64> -// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x f64> to !llvm.vec<2 x i32> +// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf64> to vector<2xi32> %4 = fptosi %arg2: vector<2xf64> to vector<2xi32> -// CHECK-NEXT: = llvm.fptosi {{.*}} : !llvm.vec<2 x f64> to !llvm.vec<2 x i64> +// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf64> to vector<2xi64> %5 = fptosi %arg2: vector<2xf64> to vector<2xi64> return } @@ -699,17 +699,17 @@ func @fptoui(%arg0 : f32, %arg1 : f64) { // Checking conversion of floating point vectors to integer vector types. // CHECK-LABEL: @fptoui_vector func @fptoui_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) { -// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x f16> to !llvm.vec<2 x i32> +// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf16> to vector<2xi32> %0 = fptoui %arg0: vector<2xf16> to vector<2xi32> -// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x f16> to !llvm.vec<2 x i64> +// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf16> to vector<2xi64> %1 = fptoui %arg0: vector<2xf16> to vector<2xi64> -// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x i32> +// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf32> to vector<2xi32> %2 = fptoui %arg1: vector<2xf32> to vector<2xi32> -// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x i64> +// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf32> to vector<2xi64> %3 = fptoui %arg1: vector<2xf32> to vector<2xi64> -// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x f64> to !llvm.vec<2 x i32> +// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf64> to vector<2xi32> %4 = fptoui %arg2: vector<2xf64> to vector<2xi32> -// CHECK-NEXT: = llvm.fptoui {{.*}} : !llvm.vec<2 x f64> to !llvm.vec<2 x i64> +// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf64> to vector<2xi64> %5 = fptoui %arg2: vector<2xf64> to vector<2xi64> return } @@ -717,17 +717,17 @@ func @fptoui_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector // Checking conversion of integer vectors to floating point vector types. // CHECK-LABEL: @uitofp_vector func @uitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) { -// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi16> to vector<2xf32> %0 = uitofp %arg0: vector<2xi16> to vector<2xf32> -// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i16> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi16> to vector<2xf64> %1 = uitofp %arg0: vector<2xi16> to vector<2xf64> -// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi32> to vector<2xf32> %2 = uitofp %arg1: vector<2xi32> to vector<2xf32> -// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i32> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi32> to vector<2xf64> %3 = uitofp %arg1: vector<2xi32> to vector<2xf64> -// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi64> to vector<2xf32> %4 = uitofp %arg2: vector<2xi64> to vector<2xf32> -// CHECK-NEXT: = llvm.uitofp {{.*}} : !llvm.vec<2 x i64> to !llvm.vec<2 x f64> +// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi64> to vector<2xf64> %5 = uitofp %arg2: vector<2xi64> to vector<2xf64> return } @@ -747,11 +747,11 @@ func @fptrunc(%arg0 : f32, %arg1 : f64) { // Checking conversion of integer types to floating point. // CHECK-LABEL: @fptrunc func @fptrunc_vector(%arg0 : vector<2xf32>, %arg1 : vector<2xf64>) { -// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.vec<2 x f32> to !llvm.vec<2 x f16> +// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf32> to vector<2xf16> %0 = fptrunc %arg0: vector<2xf32> to vector<2xf16> -// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.vec<2 x f64> to !llvm.vec<2 x f16> +// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf64> to vector<2xf16> %1 = fptrunc %arg1: vector<2xf64> to vector<2xf16> -// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.vec<2 x f64> to !llvm.vec<2 x f32> +// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf64> to vector<2xf32> %2 = fptrunc %arg1: vector<2xf64> to vector<2xf32> return } @@ -831,40 +831,40 @@ func @vec_bin(%arg0: vector<2x2x2xf32>) -> vector<2x2x2xf32> { %0 = addf %arg0, %arg0 : vector<2x2x2xf32> return %0 : vector<2x2x2xf32> -// CHECK-NEXT: llvm.mlir.undef : !llvm.array<2 x array<2 x vec<2 x f32>>> +// CHECK-NEXT: llvm.mlir.undef : !llvm.array<2 x array<2 x vector<2xf32>>> // This block appears 2x2 times -// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vec<2 x f32>>> -// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vec<2 x f32>>> -// CHECK-NEXT: llvm.fadd %{{.*}} : !llvm.vec<2 x f32> -// CHECK-NEXT: llvm.insertvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vec<2 x f32>>> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vector<2xf32>>> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vector<2xf32>>> +// CHECK-NEXT: llvm.fadd %{{.*}} : vector<2xf32> +// CHECK-NEXT: llvm.insertvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vector<2xf32>>> // We check the proper indexing of extract/insert in the remaining 3 positions. -// CHECK: llvm.extractvalue %{{.*}}[0, 1] : !llvm.array<2 x array<2 x vec<2 x f32>>> -// CHECK: llvm.insertvalue %{{.*}}[0, 1] : !llvm.array<2 x array<2 x vec<2 x f32>>> -// CHECK: llvm.extractvalue %{{.*}}[1, 0] : !llvm.array<2 x array<2 x vec<2 x f32>>> -// CHECK: llvm.insertvalue %{{.*}}[1, 0] : !llvm.array<2 x array<2 x vec<2 x f32>>> -// CHECK: llvm.extractvalue %{{.*}}[1, 1] : !llvm.array<2 x array<2 x vec<2 x f32>>> -// CHECK: llvm.insertvalue %{{.*}}[1, 1] : !llvm.array<2 x array<2 x vec<2 x f32>>> +// CHECK: llvm.extractvalue %{{.*}}[0, 1] : !llvm.array<2 x array<2 x vector<2xf32>>> +// CHECK: llvm.insertvalue %{{.*}}[0, 1] : !llvm.array<2 x array<2 x vector<2xf32>>> +// CHECK: llvm.extractvalue %{{.*}}[1, 0] : !llvm.array<2 x array<2 x vector<2xf32>>> +// CHECK: llvm.insertvalue %{{.*}}[1, 0] : !llvm.array<2 x array<2 x vector<2xf32>>> +// CHECK: llvm.extractvalue %{{.*}}[1, 1] : !llvm.array<2 x array<2 x vector<2xf32>>> +// CHECK: llvm.insertvalue %{{.*}}[1, 1] : !llvm.array<2 x array<2 x vector<2xf32>>> // And we're done // CHECK-NEXT: return } // CHECK-LABEL: @splat -// CHECK-SAME: %[[A:arg[0-9]+]]: !llvm.vec<4 x f32> +// CHECK-SAME: %[[A:arg[0-9]+]]: vector<4xf32> // CHECK-SAME: %[[ELT:arg[0-9]+]]: f32 func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> { %vb = splat %b : vector<4xf32> %r = mulf %a, %vb : vector<4xf32> return %r : vector<4xf32> } -// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : !llvm.vec<4 x f32> +// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<4xf32> // CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : !llvm.vec<4 x f32> +// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<4xf32> // CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] -// CHECK-NEXT: %[[SCALE:[0-9]+]] = llvm.fmul %[[A]], %[[SPLAT]] : !llvm.vec<4 x f32> -// CHECK-NEXT: llvm.return %[[SCALE]] : !llvm.vec<4 x f32> +// CHECK-NEXT: %[[SCALE:[0-9]+]] = llvm.fmul %[[A]], %[[SPLAT]] : vector<4xf32> +// CHECK-NEXT: llvm.return %[[SCALE]] : vector<4xf32> // CHECK-LABEL: func @view( // CHECK: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: i64 @@ -1357,24 +1357,6 @@ func @assume_alignment(%0 : memref<4x4xf16>) { // ----- -// CHECK-LABEL: func @mlir_cast_to_llvm -// CHECK-SAME: %[[ARG:.*]]: -func @mlir_cast_to_llvm(%0 : vector<2xf16>) -> !llvm.vec<2 x f16> { - %1 = llvm.mlir.cast %0 : vector<2xf16> to !llvm.vec<2 x f16> - // CHECK-NEXT: llvm.return %[[ARG]] - return %1 : !llvm.vec<2 x f16> -} - -// CHECK-LABEL: func @mlir_cast_from_llvm -// CHECK-SAME: %[[ARG:.*]]: -func @mlir_cast_from_llvm(%0 : !llvm.vec<2 x f16>) -> vector<2xf16> { - %1 = llvm.mlir.cast %0 : !llvm.vec<2 x f16> to vector<2xf16> - // CHECK-NEXT: llvm.return %[[ARG]] - return %1 : vector<2xf16> -} - -// ----- - // CHECK-LABEL: func @memref_index // CHECK-SAME: %arg0: !llvm.ptr, %arg1: !llvm.ptr, // CHECK-SAME: %arg2: i64, %arg3: i64, %arg4: i64) diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir index 56126f603c2731..72c42de3a47e6b 100644 --- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir +++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir @@ -68,11 +68,11 @@ func @rsqrt_double(%arg0 : f64) { // ----- // CHECK-LABEL: func @rsqrt_vector( -// CHECK-SAME: !llvm.vec<4 x f32> +// CHECK-SAME: vector<4xf32> func @rsqrt_vector(%arg0 : vector<4xf32>) { - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x f32> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (!llvm.vec<4 x f32>) -> !llvm.vec<4 x f32> - // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm.vec<4 x f32> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<4xf32> %0 = rsqrt %arg0 : vector<4xf32> std.return } @@ -80,13 +80,13 @@ func @rsqrt_vector(%arg0 : vector<4xf32>) { // ----- // CHECK-LABEL: func @rsqrt_multidim_vector( -// CHECK-SAME: !llvm.array<4 x vec<3 x f32>> +// CHECK-SAME: !llvm.array<4 x vector<3xf32>> func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { - // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %arg0[0] : !llvm.array<4 x vec<3 x f32>> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : !llvm.vec<3 x f32> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%[[EXTRACT]]) : (!llvm.vec<3 x f32>) -> !llvm.vec<3 x f32> - // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm.vec<3 x f32> - // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %0[0] : !llvm.array<4 x vec<3 x f32>> + // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %arg0[0] : !llvm.array<4 x vector<3xf32>> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : vector<3xf32> + // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%[[EXTRACT]]) : (vector<3xf32>) -> vector<3xf32> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<3xf32> + // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %0[0] : !llvm.array<4 x vector<3xf32>> %0 = rsqrt %arg0 : vector<4x3xf32> std.return } diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir index d15499ec387190..85e19da8401351 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir @@ -3,24 +3,24 @@ // CMP32-LABEL: llvm.func @genbool_var_1d( // CMP32-SAME: %[[A:.*]]: i64) -// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : !llvm.vec<11 x i32> +// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : vector<11xi32> // CMP32: %[[T1:.*]] = llvm.trunc %[[A]] : i64 to i32 -// CMP32: %[[T2:.*]] = llvm.mlir.undef : !llvm.vec<11 x i32> +// CMP32: %[[T2:.*]] = llvm.mlir.undef : vector<11xi32> // CMP32: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : i32] : !llvm.vec<11 x i32> -// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i32>, !llvm.vec<11 x i32> -// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : !llvm.vec<11 x i32> -// CMP32: llvm.return %[[T6]] : !llvm.vec<11 x i1> +// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : i32] : vector<11xi32> +// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi32>, vector<11xi32> +// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : vector<11xi32> +// CMP32: llvm.return %[[T6]] : vector<11xi1> // CMP64-LABEL: llvm.func @genbool_var_1d( // CMP64-SAME: %[[A:.*]]: i64) -// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : !llvm.vec<11 x i64> -// CMP64: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<11 x i64> +// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : vector<11xi64> +// CMP64: %[[T1:.*]] = llvm.mlir.undef : vector<11xi64> // CMP64: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : !llvm.vec<11 x i64> -// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i64>, !llvm.vec<11 x i64> -// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : !llvm.vec<11 x i64> -// CMP64: llvm.return %[[T5]] : !llvm.vec<11 x i1> +// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<11xi64> +// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi64>, vector<11xi64> +// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : vector<11xi64> +// CMP64: llvm.return %[[T5]] : vector<11xi1> func @genbool_var_1d(%arg0: index) -> vector<11xi1> { %0 = vector.create_mask %arg0 : vector<11xi1> @@ -28,18 +28,18 @@ func @genbool_var_1d(%arg0: index) -> vector<11xi1> { } // CMP32-LABEL: llvm.func @transfer_read_1d -// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : !llvm.vec<16 x i32> -// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i32> -// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i32> +// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : vector<16xi32> +// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi32> +// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi32> // CMP32: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} -// CMP32: llvm.return %[[L]] : !llvm.vec<16 x f32> +// CMP32: llvm.return %[[L]] : vector<16xf32> // CMP64-LABEL: llvm.func @transfer_read_1d -// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : !llvm.vec<16 x i64> -// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i64> -// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i64> +// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : vector<16xi64> +// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi64> +// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi64> // CMP64: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} -// CMP64: llvm.return %[[L]] : !llvm.vec<16 x f32> +// CMP64: llvm.return %[[L]] : vector<16xf32> func @transfer_read_1d(%A : memref, %i: index) -> vector<16xf32> { %d = constant -1.0: f32 diff --git a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir index e35f6f8f0a4bda..71d091413b2ea0 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir @@ -3,17 +3,17 @@ // // CHECK-LABEL: llvm.func @reduce_add_f32( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f32>) +// CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (f32, !llvm.vec<16 x f32>) -> f32 +// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32 // CHECK: llvm.return %[[V]] : f32 // // REASSOC-LABEL: llvm.func @reduce_add_f32( -// REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x f32>) +// REASSOC-SAME: %[[A:.*]]: vector<16xf32>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) -// REASSOC-SAME: {reassoc = true} : (f32, !llvm.vec<16 x f32>) -> f32 +// REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32 // REASSOC: llvm.return %[[V]] : f32 // func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 { @@ -23,17 +23,17 @@ func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 { // // CHECK-LABEL: llvm.func @reduce_mul_f32( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f32>) +// CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (f32, !llvm.vec<16 x f32>) -> f32 +// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32 // CHECK: llvm.return %[[V]] : f32 // // REASSOC-LABEL: llvm.func @reduce_mul_f32( -// REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x f32>) +// REASSOC-SAME: %[[A:.*]]: vector<16xf32>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]]) -// REASSOC-SAME: {reassoc = true} : (f32, !llvm.vec<16 x f32>) -> f32 +// REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32 // REASSOC: llvm.return %[[V]] : f32 // func @reduce_mul_f32(%arg0: vector<16xf32>) -> f32 { diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index ef4a85c1652c72..cfaaed48e0394b 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -6,11 +6,11 @@ func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> { } // CHECK-LABEL: llvm.func @broadcast_vec1d_from_scalar( // CHECK-SAME: %[[A:.*]]: f32) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.vec<2 x f32> +// CHECK: %[[T0:.*]] = llvm.mlir.undef : vector<2xf32> // CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T2:.*]] = llvm.insertelement %[[A]], %[[T0]][%[[T1]] : i32] : !llvm.vec<2 x f32> -// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T0]] [0 : i32, 0 : i32] : !llvm.vec<2 x f32>, !llvm.vec<2 x f32> -// CHECK: llvm.return %[[T3]] : !llvm.vec<2 x f32> +// CHECK: %[[T2:.*]] = llvm.insertelement %[[A]], %[[T0]][%[[T1]] : i32] : vector<2xf32> +// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T0]] [0 : i32, 0 : i32] : vector<2xf32>, vector<2xf32> +// CHECK: llvm.return %[[T3]] : vector<2xf32> func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32> @@ -18,14 +18,14 @@ func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { } // CHECK-LABEL: llvm.func @broadcast_vec2d_from_scalar( // CHECK-SAME: %[[A:.*]]: f32) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vec<3 x f32>> -// CHECK: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T1:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0] : !llvm.array<2 x vec<3 x f32>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<2 x vec<3 x f32>> -// CHECK: llvm.return %[[T6]] : !llvm.array<2 x vec<3 x f32>> +// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<3xf32> +// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<2 x vector<3xf32>> +// CHECK: llvm.return %[[T6]] : !llvm.array<2 x vector<3xf32>> func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32> @@ -33,277 +33,277 @@ func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { } // CHECK-LABEL: llvm.func @broadcast_vec3d_from_scalar( // CHECK-SAME: %[[A:.*]]: f32) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x array<3 x vec<4 x f32>>> -// CHECK: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<4 x f32> +// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: %[[T1:.*]] = llvm.mlir.undef : vector<4xf32> // CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : !llvm.vec<4 x f32> -// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0, 0] : !llvm.array<2 x array<3 x vec<4 x f32>>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0, 1] : !llvm.array<2 x array<3 x vec<4 x f32>>> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][0, 2] : !llvm.array<2 x array<3 x vec<4 x f32>>> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][1, 0] : !llvm.array<2 x array<3 x vec<4 x f32>>> -// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T4]], %[[T8]][1, 1] : !llvm.array<2 x array<3 x vec<4 x f32>>> -// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T4]], %[[T9]][1, 2] : !llvm.array<2 x array<3 x vec<4 x f32>>> -// CHECK: llvm.return %[[T10]] : !llvm.array<2 x array<3 x vec<4 x f32>>> +// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<4xf32> +// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<4xf32>, vector<4xf32> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0, 0] : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0, 1] : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][0, 2] : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][1, 0] : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T4]], %[[T8]][1, 1] : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T4]], %[[T9]][1, 2] : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: llvm.return %[[T10]] : !llvm.array<2 x array<3 x vector<4xf32>>> func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32> return %0 : vector<2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec1d_from_vec1d( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x f32>) -// CHECK: llvm.return %[[A]] : !llvm.vec<2 x f32> +// CHECK-SAME: %[[A:.*]]: vector<2xf32>) +// CHECK: llvm.return %[[A]] : vector<2xf32> func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32> return %0 : vector<3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec2d_from_vec1d( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x f32>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<3 x vec<2 x f32>> -// CHECK: llvm.return %[[T3]] : !llvm.array<3 x vec<2 x f32>> +// CHECK-SAME: %[[A:.*]]: vector<2xf32>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: llvm.return %[[T3]] : !llvm.array<3 x vector<2xf32>> func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec1d( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x f32>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T1]][0] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][2] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][3] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: llvm.return %[[T8]] : !llvm.array<4 x array<3 x vec<2 x f32>>> +// CHECK-SAME: %[[A:.*]]: vector<2xf32>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T1]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: llvm.return %[[T8]] : !llvm.array<4 x array<3 x vector<2xf32>>> func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec2d( -// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vec<2 x f32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][3] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: llvm.return %[[T4]] : !llvm.array<4 x array<3 x vec<2 x f32>>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vector<2xf32>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: llvm.return %[[T4]] : !llvm.array<4 x array<3 x vector<2xf32>>> func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> { %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32> return %0 : vector<4xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<1 x f32>) +// CHECK-SAME: %[[A:.*]]: vector<1xf32>) // CHECK: %[[T0:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : !llvm.vec<1 x f32> -// CHECK: %[[T2:.*]] = llvm.mlir.undef : !llvm.vec<4 x f32> +// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : vector<1xf32> +// CHECK: %[[T2:.*]] = llvm.mlir.undef : vector<4xf32> // CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%3 : i32] : !llvm.vec<4 x f32> -// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32> -// CHECK: llvm.return %[[T5]] : !llvm.vec<4 x f32> +// CHECK: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%3 : i32] : vector<4xf32> +// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<4xf32>, vector<4xf32> +// CHECK: llvm.return %[[T5]] : vector<4xf32> func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> { %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32> return %0 : vector<3x4xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch_at_start( -// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vec<4 x f32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x4xf32>) : !llvm.array<3 x vec<4 x f32>> -// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vec<4 x f32>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<3 x vec<4 x f32>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][1] : !llvm.array<3 x vec<4 x f32>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T1]], %[[T3]][2] : !llvm.array<3 x vec<4 x f32>> -// CHECK: llvm.return %[[T4]] : !llvm.array<3 x vec<4 x f32>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vector<4xf32>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x4xf32>) : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vector<4xf32>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][1] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T1]], %[[T3]][2] : !llvm.array<3 x vector<4xf32>> +// CHECK: llvm.return %[[T4]] : !llvm.array<3 x vector<4xf32>> func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> { %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32> return %0 : vector<4x3xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch_at_end( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vec<1 x f32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3xf32>) : !llvm.array<4 x vec<3 x f32>> -// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<4 x vec<1 x f32>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<1xf32>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3xf32>) : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<4 x vector<1xf32>> // CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]][%[[T2]] : i64] : !llvm.vec<1 x f32> -// CHECK: %[[T4:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]][%[[T2]] : i64] : vector<1xf32> +// CHECK: %[[T4:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T5:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T6:.*]] = llvm.insertelement %[[T3]], %[[T4]][%[[T5]] : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T7:.*]] = llvm.shufflevector %[[T6]], %[[T4]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<4 x vec<3 x f32>> -// CHECK: %[[T9:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<4 x vec<1 x f32>> +// CHECK: %[[T6:.*]] = llvm.insertelement %[[T3]], %[[T4]][%[[T5]] : i32] : vector<3xf32> +// CHECK: %[[T7:.*]] = llvm.shufflevector %[[T6]], %[[T4]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T9:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<4 x vector<1xf32>> // CHECK: %[[T10:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T11:.*]] = llvm.extractelement %[[T9]][%[[T10]] : i64] : !llvm.vec<1 x f32> -// CHECK: %[[T12:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T11:.*]] = llvm.extractelement %[[T9]][%[[T10]] : i64] : vector<1xf32> +// CHECK: %[[T12:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<4 x vec<3 x f32>> -// CHECK: %[[T17:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vec<1 x f32>> +// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : vector<3xf32> +// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T17:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<1xf32>> // CHECK: %[[T18:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T19:.*]] = llvm.extractelement %[[T17]][%[[T18]] : i64] : !llvm.vec<1 x f32> -// CHECK: %[[T20:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T19:.*]] = llvm.extractelement %[[T17]][%[[T18]] : i64] : vector<1xf32> +// CHECK: %[[T20:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T21:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T22:.*]] = llvm.insertelement %[[T19]], %[[T20]][%[[T21]] : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T23:.*]] = llvm.shufflevector %[[T22]], %[[T20]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T16]][2] : !llvm.array<4 x vec<3 x f32>> -// CHECK: %[[T25:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vec<1 x f32>> +// CHECK: %[[T22:.*]] = llvm.insertelement %[[T19]], %[[T20]][%[[T21]] : i32] : vector<3xf32> +// CHECK: %[[T23:.*]] = llvm.shufflevector %[[T22]], %[[T20]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T16]][2] : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T25:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<1xf32>> // CHECK: %[[T26:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T27:.*]] = llvm.extractelement %[[T25]][%[[T26]] : i64] : !llvm.vec<1 x f32> -// CHECK: %[[T28:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T27:.*]] = llvm.extractelement %[[T25]][%[[T26]] : i64] : vector<1xf32> +// CHECK: %[[T28:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T29:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T30:.*]] = llvm.insertelement %[[T27]], %[[T28]][%[[T29]] : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T31:.*]] = llvm.shufflevector %[[T30]], %[[T28]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T32:.*]] = llvm.insertvalue %[[T31]], %[[T24]][3] : !llvm.array<4 x vec<3 x f32>> -// CHECK: llvm.return %[[T32]] : !llvm.array<4 x vec<3 x f32>> +// CHECK: %[[T30:.*]] = llvm.insertelement %[[T27]], %[[T28]][%[[T29]] : i32] : vector<3xf32> +// CHECK: %[[T31:.*]] = llvm.shufflevector %[[T30]], %[[T28]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T32:.*]] = llvm.insertvalue %[[T31]], %[[T24]][3] : !llvm.array<4 x vector<3xf32>> +// CHECK: llvm.return %[[T32]] : !llvm.array<4 x vector<3xf32>> func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch_in_middle( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x array<1 x vec<2 x f32>>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][0, 0] : !llvm.array<4 x array<1 x vec<2 x f32>>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T2]], %[[T1]][0] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T2]], %[[T4]][1] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T2]], %[[T5]][2] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T0]][0] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T8:.*]] = llvm.extractvalue %[[A]][1, 0] : !llvm.array<4 x array<1 x vec<2 x f32>>> -// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T1]][0] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T8]], %[[T10]][1] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T12:.*]] = llvm.insertvalue %[[T8]], %[[T11]][2] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T7]][1] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T14:.*]] = llvm.extractvalue %[[A]][2, 0] : !llvm.array<4 x array<1 x vec<2 x f32>>> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T14]], %[[T1]][0] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T14]], %[[T16]][1] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T14]], %[[T17]][2] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T19:.*]] = llvm.insertvalue %[[T18]], %[[T13]][2] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: %[[T20:.*]] = llvm.extractvalue %[[A]][3, 0] : !llvm.array<4 x array<1 x vec<2 x f32>>> -// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T20]], %[[T1]][0] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T20]], %[[T22]][1] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T20]], %[[T23]][2] : !llvm.array<3 x vec<2 x f32>> -// CHECK: %[[T25:.*]] = llvm.insertvalue %[[T24]], %[[T19]][3] : !llvm.array<4 x array<3 x vec<2 x f32>>> -// CHECK: llvm.return %[[T25]] : !llvm.array<4 x array<3 x vec<2 x f32>>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x array<1 x vector<2xf32>>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][0, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T2]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T2]], %[[T4]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T2]], %[[T5]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T0]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T8:.*]] = llvm.extractvalue %[[A]][1, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T8]], %[[T10]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T12:.*]] = llvm.insertvalue %[[T8]], %[[T11]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T14:.*]] = llvm.extractvalue %[[A]][2, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T14]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T14]], %[[T16]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T14]], %[[T17]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T19:.*]] = llvm.insertvalue %[[T18]], %[[T13]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T20:.*]] = llvm.extractvalue %[[A]][3, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T20]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T20]], %[[T22]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T20]], %[[T23]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T25:.*]] = llvm.insertvalue %[[T24]], %[[T19]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: llvm.return %[[T25]] : !llvm.array<4 x array<3 x vector<2xf32>>> func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32> { %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } // CHECK-LABEL: llvm.func @outerproduct( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x f32>, -// CHECK-SAME: %[[B:.*]]: !llvm.vec<3 x f32>) +// CHECK-SAME: %[[A:.*]]: vector<2xf32>, +// CHECK-SAME: %[[B:.*]]: vector<3xf32>) // CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>) // CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : !llvm.vec<2 x f32> -// CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : vector<2xf32> +// CHECK: %[[T3:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T7:.*]] = llvm.fmul %[[T6]], %[[B]] : !llvm.vec<3 x f32> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<2 x vec<3 x f32>> +// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : i32] : vector<3xf32> +// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T7:.*]] = llvm.fmul %[[T6]], %[[B]] : vector<3xf32> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>> // CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]][%9 : i64] : !llvm.vec<2 x f32> -// CHECK: %[[T11:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]][%9 : i64] : vector<2xf32> +// CHECK: %[[T11:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T12:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T13]], %[[T11]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T15:.*]] = llvm.fmul %[[T14]], %[[B]] : !llvm.vec<3 x f32> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<2 x vec<3 x f32>> -// CHECK: llvm.return %[[T16]] : !llvm.array<2 x vec<3 x f32>> +// CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : i32] : vector<3xf32> +// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T13]], %[[T11]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T15:.*]] = llvm.fmul %[[T14]], %[[B]] : vector<3xf32> +// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<2 x vector<3xf32>> +// CHECK: llvm.return %[[T16]] : !llvm.array<2 x vector<3xf32>> func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> { %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } // CHECK-LABEL: llvm.func @outerproduct_add( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x f32>, -// CHECK-SAME: %[[B:.*]]: !llvm.vec<3 x f32>, -// CHECK-SAME: %[[C:.*]]: !llvm.array<2 x vec<3 x f32>>) +// CHECK-SAME: %[[A:.*]]: vector<2xf32>, +// CHECK-SAME: %[[B:.*]]: vector<3xf32>, +// CHECK-SAME: %[[C:.*]]: !llvm.array<2 x vector<3xf32>>) // CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>) // CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : !llvm.vec<2 x f32> -// CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : vector<2xf32> +// CHECK: %[[T3:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm.array<2 x vec<3 x f32>> -// CHECK: %[[T8:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T7]]) : (!llvm.vec<3 x f32>, !llvm.vec<3 x f32>, !llvm.vec<3 x f32>) -// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T0]][0] : !llvm.array<2 x vec<3 x f32>> +// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : i32] : vector<3xf32> +// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T8:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T7]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) +// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>> // CHECK: %[[T10:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : i64] : !llvm.vec<2 x f32> -// CHECK: %[[T12:.*]] = llvm.mlir.undef : !llvm.vec<3 x f32> +// CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : i64] : vector<2xf32> +// CHECK: %[[T12:.*]] = llvm.mlir.undef : vector<3xf32> // CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : !llvm.vec<3 x f32> -// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm.array<2 x vec<3 x f32>> -// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T15]], %[[B]], %[[T16]]) : (!llvm.vec<3 x f32>, !llvm.vec<3 x f32>, !llvm.vec<3 x f32>) -// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T9]][1] : !llvm.array<2 x vec<3 x f32>> -// CHECK: llvm.return %[[T18]] : !llvm.array<2 x vec<3 x f32>> +// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : vector<3xf32> +// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T15]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) +// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T9]][1] : !llvm.array<2 x vector<3xf32>> +// CHECK: llvm.return %[[T18]] : !llvm.array<2 x vector<3xf32>> func @shuffle_1D_direct(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<2xf32> { %1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xf32>, vector<2xf32> return %1 : vector<2xf32> } // CHECK-LABEL: llvm.func @shuffle_1D_direct( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x f32>, -// CHECK-SAME: %[[B:.*]]: !llvm.vec<2 x f32>) -// CHECK: %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, 1] : !llvm.vec<2 x f32>, !llvm.vec<2 x f32> -// CHECK: llvm.return %[[s]] : !llvm.vec<2 x f32> +// CHECK-SAME: %[[A:.*]]: vector<2xf32>, +// CHECK-SAME: %[[B:.*]]: vector<2xf32>) +// CHECK: %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, 1] : vector<2xf32>, vector<2xf32> +// CHECK: llvm.return %[[s]] : vector<2xf32> func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> { %1 = vector.shuffle %arg0, %arg1 [4, 3, 2, 1, 0] : vector<2xf32>, vector<3xf32> return %1 : vector<5xf32> } // CHECK-LABEL: llvm.func @shuffle_1D( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x f32>, -// CHECK-SAME: %[[B:.*]]: !llvm.vec<3 x f32>) -// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.vec<5 x f32> +// CHECK-SAME: %[[A:.*]]: vector<2xf32>, +// CHECK-SAME: %[[B:.*]]: vector<3xf32>) +// CHECK: %[[u0:.*]] = llvm.mlir.undef : vector<5xf32> // CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[e1:.*]] = llvm.extractelement %[[B]][%[[c2]] : i64] : !llvm.vec<3 x f32> +// CHECK: %[[e1:.*]] = llvm.extractelement %[[B]][%[[c2]] : i64] : vector<3xf32> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[i1:.*]] = llvm.insertelement %[[e1]], %[[u0]][%[[c0]] : i64] : !llvm.vec<5 x f32> +// CHECK: %[[i1:.*]] = llvm.insertelement %[[e1]], %[[u0]][%[[c0]] : i64] : vector<5xf32> // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[e2:.*]] = llvm.extractelement %[[B]][%[[c1]] : i64] : !llvm.vec<3 x f32> +// CHECK: %[[e2:.*]] = llvm.extractelement %[[B]][%[[c1]] : i64] : vector<3xf32> // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[i2:.*]] = llvm.insertelement %[[e2]], %[[i1]][%[[c1]] : i64] : !llvm.vec<5 x f32> +// CHECK: %[[i2:.*]] = llvm.insertelement %[[e2]], %[[i1]][%[[c1]] : i64] : vector<5xf32> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[e3:.*]] = llvm.extractelement %[[B]][%[[c0]] : i64] : !llvm.vec<3 x f32> +// CHECK: %[[e3:.*]] = llvm.extractelement %[[B]][%[[c0]] : i64] : vector<3xf32> // CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[i3:.*]] = llvm.insertelement %[[e3]], %[[i2]][%[[c2]] : i64] : !llvm.vec<5 x f32> +// CHECK: %[[i3:.*]] = llvm.insertelement %[[e3]], %[[i2]][%[[c2]] : i64] : vector<5xf32> // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[e4:.*]] = llvm.extractelement %[[A]][%[[c1]] : i64] : !llvm.vec<2 x f32> +// CHECK: %[[e4:.*]] = llvm.extractelement %[[A]][%[[c1]] : i64] : vector<2xf32> // CHECK: %[[c3:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[i4:.*]] = llvm.insertelement %[[e4]], %[[i3]][%[[c3]] : i64] : !llvm.vec<5 x f32> +// CHECK: %[[i4:.*]] = llvm.insertelement %[[e4]], %[[i3]][%[[c3]] : i64] : vector<5xf32> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[e5:.*]] = llvm.extractelement %[[A]][%[[c0]] : i64] : !llvm.vec<2 x f32> +// CHECK: %[[e5:.*]] = llvm.extractelement %[[A]][%[[c0]] : i64] : vector<2xf32> // CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : i64] : !llvm.vec<5 x f32> -// CHECK: llvm.return %[[i5]] : !llvm.vec<5 x f32> +// CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : i64] : vector<5xf32> +// CHECK: llvm.return %[[i5]] : vector<5xf32> func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> { %1 = vector.shuffle %a, %b[1, 0, 2] : vector<1x4xf32>, vector<2x4xf32> return %1 : vector<3x4xf32> } // CHECK-LABEL: llvm.func @shuffle_2D( -// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vec<4 x f32>>, -// CHECK-SAME: %[[B:.*]]: !llvm.array<2 x vec<4 x f32>>) -// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.array<3 x vec<4 x f32>> -// CHECK: %[[e1:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x f32>> -// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm.array<3 x vec<4 x f32>> -// CHECK: %[[e2:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vec<4 x f32>> -// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm.array<3 x vec<4 x f32>> -// CHECK: %[[e3:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x f32>> -// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm.array<3 x vec<4 x f32>> -// CHECK: llvm.return %[[i3]] : !llvm.array<3 x vec<4 x f32>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vector<4xf32>>, +// CHECK-SAME: %[[B:.*]]: !llvm.array<2 x vector<4xf32>>) +// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[e1:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[e2:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vector<4xf32>> +// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[e3:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm.array<3 x vector<4xf32>> +// CHECK: llvm.return %[[i3]] : !llvm.array<3 x vector<4xf32>> func @extract_element(%arg0: vector<16xf32>) -> f32 { %0 = constant 15 : i32 @@ -311,9 +311,9 @@ func @extract_element(%arg0: vector<16xf32>) -> f32 { return %1 : f32 } // CHECK-LABEL: llvm.func @extract_element( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f32>) +// CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[c:.*]] = llvm.mlir.constant(15 : i32) : i32 -// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : !llvm.vec<16 x f32> +// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : vector<16xf32> // CHECK: llvm.return %[[x]] : f32 func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { @@ -322,7 +322,7 @@ func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { } // CHECK-LABEL: llvm.func @extract_element_from_vec_1d // CHECK: llvm.mlir.constant(15 : i64) : i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : !llvm.vec<16 x f32> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> // CHECK: llvm.return {{.*}} : f32 func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> { @@ -330,25 +330,25 @@ func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> return %0 : vector<3x16xf32> } // CHECK-LABEL: llvm.func @extract_vec_2d_from_vec_3d -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vec<16 x f32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<3 x vec<16 x f32>> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<16xf32>>> +// CHECK: llvm.return {{.*}} : !llvm.array<3 x vector<16xf32>> func @extract_vec_1d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<16xf32> { %0 = vector.extract %arg0[0, 0]: vector<4x3x16xf32> return %0 : vector<16xf32> } // CHECK-LABEL: llvm.func @extract_vec_1d_from_vec_3d -// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vec<16 x f32>>> -// CHECK: llvm.return {{.*}} : !llvm.vec<16 x f32> +// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> +// CHECK: llvm.return {{.*}} : vector<16xf32> func @extract_element_from_vec_3d(%arg0: vector<4x3x16xf32>) -> f32 { %0 = vector.extract %arg0[0, 0, 0]: vector<4x3x16xf32> return %0 : f32 } // CHECK-LABEL: llvm.func @extract_element_from_vec_3d -// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vec<16 x f32>>> +// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> // CHECK: llvm.mlir.constant(0 : i64) : i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : !llvm.vec<16 x f32> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> // CHECK: llvm.return {{.*}} : f32 func @insert_element(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { @@ -358,10 +358,10 @@ func @insert_element(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { } // CHECK-LABEL: llvm.func @insert_element( // CHECK-SAME: %[[A:.*]]: f32, -// CHECK-SAME: %[[B:.*]]: !llvm.vec<4 x f32>) +// CHECK-SAME: %[[B:.*]]: vector<4xf32>) // CHECK: %[[c:.*]] = llvm.mlir.constant(3 : i32) : i32 -// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : i32] : !llvm.vec<4 x f32> -// CHECK: llvm.return %[[x]] : !llvm.vec<4 x f32> +// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : i32] : vector<4xf32> +// CHECK: llvm.return %[[x]] : vector<4xf32> func @insert_element_into_vec_1d(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { %0 = vector.insert %arg0, %arg1[3] : f32 into vector<4xf32> @@ -369,65 +369,65 @@ func @insert_element_into_vec_1d(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf } // CHECK-LABEL: llvm.func @insert_element_into_vec_1d // CHECK: llvm.mlir.constant(3 : i64) : i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : !llvm.vec<4 x f32> -// CHECK: llvm.return {{.*}} : !llvm.vec<4 x f32> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> +// CHECK: llvm.return {{.*}} : vector<4xf32> func @insert_vec_2d_into_vec_3d(%arg0: vector<8x16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { %0 = vector.insert %arg0, %arg1[3] : vector<8x16xf32> into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } // CHECK-LABEL: llvm.func @insert_vec_2d_into_vec_3d -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vec<16 x f32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vec<16 x f32>>> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<16xf32>>> +// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>> func @insert_vec_1d_into_vec_3d(%arg0: vector<16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { %0 = vector.insert %arg0, %arg1[3, 7] : vector<16xf32> into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } // CHECK-LABEL: llvm.func @insert_vec_1d_into_vec_3d -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vec<16 x f32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vec<16 x f32>>> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> +// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>> func @insert_element_into_vec_3d(%arg0: f32, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } // CHECK-LABEL: llvm.func @insert_element_into_vec_3d -// CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vec<16 x f32>>> +// CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> // CHECK: llvm.mlir.constant(15 : i64) : i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : !llvm.vec<16 x f32> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vec<16 x f32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vec<16 x f32>>> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<16xf32> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> +// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>> func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref> { %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref> return %0 : memref> } // CHECK-LABEL: llvm.func @vector_type_cast -// CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>>, ptr>>>, i64)> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>>, ptr>>>, i64)> // CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>> -// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm.struct<(ptr>>>, ptr>>>, i64)> +// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>> +// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm.struct<(ptr>>>, ptr>>>, i64)> // CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr>>> -// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm.struct<(ptr>>>, ptr>>>, i64)> +// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr>>> +// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm.struct<(ptr>>>, ptr>>>, i64)> // CHECK: llvm.mlir.constant(0 : index -// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr>>>, ptr>>>, i64)> +// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr>>>, ptr>>>, i64)> func @vector_type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref, 3> { %0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref, 3> return %0 : memref, 3> } // CHECK-LABEL: llvm.func @vector_type_cast_non_zero_addrspace -// CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> // CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>, 3> -// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> +// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>, 3> +// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> // CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr>>, 3> -// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> +// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr>>, 3> +// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> // CHECK: llvm.mlir.constant(0 : index -// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> +// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> func @vector_print_scalar_i1(%arg0: i1) { vector.print %arg0 : i1 @@ -571,27 +571,27 @@ func @vector_print_vector(%arg0: vector<2x2xf32>) { return } // CHECK-LABEL: llvm.func @vector_print_vector( -// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vec<2 x f32>>) +// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vector<2xf32>>) // CHECK: llvm.call @printOpen() : () -> () -// CHECK: %[[x0:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vec<2 x f32>> +// CHECK: %[[x0:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vector<2xf32>> // CHECK: llvm.call @printOpen() : () -> () // CHECK: %[[x1:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[x2:.*]] = llvm.extractelement %[[x0]][%[[x1]] : i64] : !llvm.vec<2 x f32> +// CHECK: %[[x2:.*]] = llvm.extractelement %[[x0]][%[[x1]] : i64] : vector<2xf32> // CHECK: llvm.call @printF32(%[[x2]]) : (f32) -> () // CHECK: llvm.call @printComma() : () -> () // CHECK: %[[x3:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[x4:.*]] = llvm.extractelement %[[x0]][%[[x3]] : i64] : !llvm.vec<2 x f32> +// CHECK: %[[x4:.*]] = llvm.extractelement %[[x0]][%[[x3]] : i64] : vector<2xf32> // CHECK: llvm.call @printF32(%[[x4]]) : (f32) -> () // CHECK: llvm.call @printClose() : () -> () // CHECK: llvm.call @printComma() : () -> () -// CHECK: %[[x5:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vec<2 x f32>> +// CHECK: %[[x5:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vector<2xf32>> // CHECK: llvm.call @printOpen() : () -> () // CHECK: %[[x6:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[x7:.*]] = llvm.extractelement %[[x5]][%[[x6]] : i64] : !llvm.vec<2 x f32> +// CHECK: %[[x7:.*]] = llvm.extractelement %[[x5]][%[[x6]] : i64] : vector<2xf32> // CHECK: llvm.call @printF32(%[[x7]]) : (f32) -> () // CHECK: llvm.call @printComma() : () -> () // CHECK: %[[x8:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[x9:.*]] = llvm.extractelement %[[x5]][%[[x8]] : i64] : !llvm.vec<2 x f32> +// CHECK: %[[x9:.*]] = llvm.extractelement %[[x5]][%[[x8]] : i64] : vector<2xf32> // CHECK: llvm.call @printF32(%[[x9]]) : (f32) -> () // CHECK: llvm.call @printClose() : () -> () // CHECK: llvm.call @printClose() : () -> () @@ -602,45 +602,45 @@ func @extract_strided_slice1(%arg0: vector<4xf32>) -> vector<2xf32> { return %0 : vector<2xf32> } // CHECK-LABEL: llvm.func @extract_strided_slice1( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<4 x f32>) -// CHECK: %[[T0:.*]] = llvm.shufflevector %[[A]], %[[A]] [2, 3] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32> -// CHECK: llvm.return %[[T0]] : !llvm.vec<2 x f32> +// CHECK-SAME: %[[A:.*]]: vector<4xf32>) +// CHECK: %[[T0:.*]] = llvm.shufflevector %[[A]], %[[A]] [2, 3] : vector<4xf32>, vector<4xf32> +// CHECK: llvm.return %[[T0]] : vector<2xf32> func @extract_strided_slice2(%arg0: vector<4x8xf32>) -> vector<2x8xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32> return %0 : vector<2x8xf32> } // CHECK-LABEL: llvm.func @extract_strided_slice2( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vec<8 x f32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vec<8 x f32>> -// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vec<8 x f32>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vec<8 x f32>> -// CHECK: %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vec<8 x f32>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vec<8 x f32>> -// CHECK: llvm.return %[[T4]] : !llvm.array<2 x vec<8 x f32>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<8xf32>>) +// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<8xf32>> +// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vector<8xf32>> +// CHECK: %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vector<8xf32>> +// CHECK: llvm.return %[[T4]] : !llvm.array<2 x vector<8xf32>> func @extract_strided_slice3(%arg0: vector<4x8xf32>) -> vector<2x2xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32> return %0 : vector<2x2xf32> } // CHECK-LABEL: llvm.func @extract_strided_slice3( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vec<8 x f32>>) -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm.array<2 x vec<2 x f32>> -// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vec<8 x f32>> -// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2, 3] : !llvm.vec<8 x f32>, !llvm.vec<8 x f32> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<2 x vec<2 x f32>> -// CHECK: %[[T5:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vec<8 x f32>> -// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T5]] [2, 3] : !llvm.vec<8 x f32>, !llvm.vec<8 x f32> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T4]][1] : !llvm.array<2 x vec<2 x f32>> -// CHECK: llvm.return %[[T7]] : !llvm.array<2 x vec<2 x f32>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<8xf32>>) +// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2, 3] : vector<8xf32>, vector<8xf32> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[T5:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T5]] [2, 3] : vector<8xf32>, vector<8xf32> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T4]][1] : !llvm.array<2 x vector<2xf32>> +// CHECK: llvm.return %[[T7]] : !llvm.array<2 x vector<2xf32>> func @insert_strided_slice1(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vector<4x4x4xf32> { %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32> return %0 : vector<4x4x4xf32> } // CHECK-LABEL: llvm.func @insert_strided_slice1 -// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vec<4 x f32>>> -// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vec<4 x f32>>> +// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> func @insert_strided_slice2(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector<4x4xf32> { %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32> @@ -649,34 +649,34 @@ func @insert_strided_slice2(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector< // CHECK-LABEL: llvm.func @insert_strided_slice2 // // Subvector vector<2xf32> @0 into vector<4xf32> @2 -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vec<2 x f32>> -// CHECK-NEXT: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vec<4 x f32>> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<2xf32>> +// CHECK-NEXT: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vector<4xf32>> // Element @0 -> element @2 // CHECK-NEXT: llvm.mlir.constant(0 : index) : i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : !llvm.vec<2 x f32> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> // CHECK-NEXT: llvm.mlir.constant(2 : index) : i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : !llvm.vec<4 x f32> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> // Element @1 -> element @3 // CHECK-NEXT: llvm.mlir.constant(1 : index) : i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : !llvm.vec<2 x f32> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> // CHECK-NEXT: llvm.mlir.constant(3 : index) : i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : !llvm.vec<4 x f32> -// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x vec<4 x f32>> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x vector<4xf32>> // // Subvector vector<2xf32> @1 into vector<4xf32> @3 -// CHECK: llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vec<2 x f32>> -// CHECK-NEXT: llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vec<4 x f32>> +// CHECK: llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<2xf32>> +// CHECK-NEXT: llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vector<4xf32>> // Element @0 -> element @2 // CHECK-NEXT: llvm.mlir.constant(0 : index) : i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : !llvm.vec<2 x f32> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> // CHECK-NEXT: llvm.mlir.constant(2 : index) : i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : !llvm.vec<4 x f32> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> // Element @1 -> element @3 // CHECK-NEXT: llvm.mlir.constant(1 : index) : i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : !llvm.vec<2 x f32> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> // CHECK-NEXT: llvm.mlir.constant(3 : index) : i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : !llvm.vec<4 x f32> -// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x vec<4 x f32>> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x vector<4xf32>> func @insert_strided_slice3(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) -> vector<16x4x8xf32> { %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [0, 0, 2], strides = [1, 1]}: @@ -684,49 +684,49 @@ func @insert_strided_slice3(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) - return %0 : vector<16x4x8xf32> } // CHECK-LABEL: llvm.func @insert_strided_slice3( -// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vec<4 x f32>>, -// CHECK-SAME: %[[B:.*]]: !llvm.array<16 x array<4 x vec<8 x f32>>>) -// CHECK: %[[s0:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<16 x array<4 x vec<8 x f32>>> -// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vec<4 x f32>> -// CHECK: %[[s2:.*]] = llvm.extractvalue %[[B]][0, 0] : !llvm.array<16 x array<4 x vec<8 x f32>>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vector<4xf32>>, +// CHECK-SAME: %[[B:.*]]: !llvm.array<16 x array<4 x vector<8xf32>>>) +// CHECK: %[[s0:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[s2:.*]] = llvm.extractvalue %[[B]][0, 0] : !llvm.array<16 x array<4 x vector<8xf32>>> // CHECK: %[[s3:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[s4:.*]] = llvm.extractelement %[[s1]][%[[s3]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s4:.*]] = llvm.extractelement %[[s1]][%[[s3]] : i64] : vector<4xf32> // CHECK: %[[s5:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s6:.*]] = llvm.insertelement %[[s4]], %[[s2]][%[[s5]] : i64] : !llvm.vec<8 x f32> +// CHECK: %[[s6:.*]] = llvm.insertelement %[[s4]], %[[s2]][%[[s5]] : i64] : vector<8xf32> // CHECK: %[[s7:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[s8:.*]] = llvm.extractelement %[[s1]][%[[s7]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s8:.*]] = llvm.extractelement %[[s1]][%[[s7]] : i64] : vector<4xf32> // CHECK: %[[s9:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s10:.*]] = llvm.insertelement %[[s8]], %[[s6]][%[[s9]] : i64] : !llvm.vec<8 x f32> +// CHECK: %[[s10:.*]] = llvm.insertelement %[[s8]], %[[s6]][%[[s9]] : i64] : vector<8xf32> // CHECK: %[[s11:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s12:.*]] = llvm.extractelement %[[s1]][%[[s11]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s12:.*]] = llvm.extractelement %[[s1]][%[[s11]] : i64] : vector<4xf32> // CHECK: %[[s13:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[s14:.*]] = llvm.insertelement %[[s12]], %[[s10]][%[[s13]] : i64] : !llvm.vec<8 x f32> +// CHECK: %[[s14:.*]] = llvm.insertelement %[[s12]], %[[s10]][%[[s13]] : i64] : vector<8xf32> // CHECK: %[[s15:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s16:.*]] = llvm.extractelement %[[s1]][%[[s15]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s16:.*]] = llvm.extractelement %[[s1]][%[[s15]] : i64] : vector<4xf32> // CHECK: %[[s17:.*]] = llvm.mlir.constant(5 : index) : i64 -// CHECK: %[[s18:.*]] = llvm.insertelement %[[s16]], %[[s14]][%[[s17]] : i64] : !llvm.vec<8 x f32> -// CHECK: %[[s19:.*]] = llvm.insertvalue %[[s18]], %[[s0]][0] : !llvm.array<4 x vec<8 x f32>> -// CHECK: %[[s20:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vec<4 x f32>> -// CHECK: %[[s21:.*]] = llvm.extractvalue %[[B]][0, 1] : !llvm.array<16 x array<4 x vec<8 x f32>>> +// CHECK: %[[s18:.*]] = llvm.insertelement %[[s16]], %[[s14]][%[[s17]] : i64] : vector<8xf32> +// CHECK: %[[s19:.*]] = llvm.insertvalue %[[s18]], %[[s0]][0] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[s20:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[s21:.*]] = llvm.extractvalue %[[B]][0, 1] : !llvm.array<16 x array<4 x vector<8xf32>>> // CHECK: %[[s22:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[s23:.*]] = llvm.extractelement %[[s20]][%[[s22]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s23:.*]] = llvm.extractelement %[[s20]][%[[s22]] : i64] : vector<4xf32> // CHECK: %[[s24:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s25:.*]] = llvm.insertelement %[[s23]], %[[s21]][%[[s24]] : i64] : !llvm.vec<8 x f32> +// CHECK: %[[s25:.*]] = llvm.insertelement %[[s23]], %[[s21]][%[[s24]] : i64] : vector<8xf32> // CHECK: %[[s26:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[s27:.*]] = llvm.extractelement %[[s20]][%[[s26]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s27:.*]] = llvm.extractelement %[[s20]][%[[s26]] : i64] : vector<4xf32> // CHECK: %[[s28:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s29:.*]] = llvm.insertelement %[[s27]], %[[s25]][%[[s28]] : i64] : !llvm.vec<8 x f32> +// CHECK: %[[s29:.*]] = llvm.insertelement %[[s27]], %[[s25]][%[[s28]] : i64] : vector<8xf32> // CHECK: %[[s30:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s31:.*]] = llvm.extractelement %[[s20]][%[[s30]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s31:.*]] = llvm.extractelement %[[s20]][%[[s30]] : i64] : vector<4xf32> // CHECK: %[[s32:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[s33:.*]] = llvm.insertelement %[[s31]], %[[s29]][%[[s32]] : i64] : !llvm.vec<8 x f32> +// CHECK: %[[s33:.*]] = llvm.insertelement %[[s31]], %[[s29]][%[[s32]] : i64] : vector<8xf32> // CHECK: %[[s34:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s35:.*]] = llvm.extractelement %[[s20]][%[[s34]] : i64] : !llvm.vec<4 x f32> +// CHECK: %[[s35:.*]] = llvm.extractelement %[[s20]][%[[s34]] : i64] : vector<4xf32> // CHECK: %[[s36:.*]] = llvm.mlir.constant(5 : index) : i64 -// CHECK: %[[s37:.*]] = llvm.insertelement %[[s35]], %[[s33]][%[[s36]] : i64] : !llvm.vec<8 x f32> -// CHECK: %[[s38:.*]] = llvm.insertvalue %[[s37]], %[[s19]][1] : !llvm.array<4 x vec<8 x f32>> -// CHECK: %[[s39:.*]] = llvm.insertvalue %[[s38]], %[[B]][0] : !llvm.array<16 x array<4 x vec<8 x f32>>> -// CHECK: llvm.return %[[s39]] : !llvm.array<16 x array<4 x vec<8 x f32>>> +// CHECK: %[[s37:.*]] = llvm.insertelement %[[s35]], %[[s33]][%[[s36]] : i64] : vector<8xf32> +// CHECK: %[[s38:.*]] = llvm.insertvalue %[[s37]], %[[s19]][1] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[s39:.*]] = llvm.insertvalue %[[s38]], %[[B]][0] : !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: llvm.return %[[s39]] : !llvm.array<16 x array<4 x vector<8xf32>>> func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> { %0 = vector.extract_slices %arg0, [2, 2], [1, 1] @@ -735,33 +735,33 @@ func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> { return %1 : vector<1x1xf32> } // CHECK-LABEL: llvm.func @extract_strides( -// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vec<3 x f32>>) -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vec<1 x f32>> -// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<3 x vec<3 x f32>> -// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2] : !llvm.vec<3 x f32>, !llvm.vec<3 x f32> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<1 x vec<1 x f32>> -// CHECK: llvm.return %[[T4]] : !llvm.array<1 x vec<1 x f32>> +// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vector<3xf32>>) +// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vector<1xf32>> +// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<3 x vector<3xf32>> +// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2] : vector<3xf32>, vector<3xf32> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<1 x vector<1xf32>> +// CHECK: llvm.return %[[T4]] : !llvm.array<1 x vector<1xf32>> // CHECK-LABEL: llvm.func @vector_fma( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<8 x f32>, %[[B:.*]]: !llvm.array<2 x vec<4 x f32>>) -// CHECK-SAME: -> !llvm.struct<(vec<8 x f32>, array<2 x vec<4 x f32>>)> { +// CHECK-SAME: %[[A:.*]]: vector<8xf32>, %[[B:.*]]: !llvm.array<2 x vector<4xf32>>) +// CHECK-SAME: -> !llvm.struct<(vector<8xf32>, array<2 x vector<4xf32>>)> { func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>) -> (vector<8xf32>, vector<2x4xf32>) { // CHECK: "llvm.intr.fmuladd"(%[[A]], %[[A]], %[[A]]) : - // CHECK-SAME: (!llvm.vec<8 x f32>, !llvm.vec<8 x f32>, !llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %0 = vector.fma %a, %a, %a : vector<8xf32> - // CHECK: %[[b00:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x f32>> - // CHECK: %[[b01:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x f32>> - // CHECK: %[[b02:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x f32>> + // CHECK: %[[b00:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b01:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b02:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[B0:.*]] = "llvm.intr.fmuladd"(%[[b00]], %[[b01]], %[[b02]]) : - // CHECK-SAME: (!llvm.vec<4 x f32>, !llvm.vec<4 x f32>, !llvm.vec<4 x f32>) -> !llvm.vec<4 x f32> - // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vec<4 x f32>> - // CHECK: %[[b10:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x f32>> - // CHECK: %[[b11:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x f32>> - // CHECK: %[[b12:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x f32>> + // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> + // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b10:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b11:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b12:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[B1:.*]] = "llvm.intr.fmuladd"(%[[b10]], %[[b11]], %[[b12]]) : - // CHECK-SAME: (!llvm.vec<4 x f32>, !llvm.vec<4 x f32>, !llvm.vec<4 x f32>) -> !llvm.vec<4 x f32> - // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vec<4 x f32>> + // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> + // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>> %1 = vector.fma %b, %b, %b : vector<2x4xf32> return %0, %1: vector<8xf32>, vector<2x4xf32> @@ -772,10 +772,10 @@ func @reduce_f16(%arg0: vector<16xf16>) -> f16 { return %0 : f16 } // CHECK-LABEL: llvm.func @reduce_f16( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f16>) +// CHECK-SAME: %[[A:.*]]: vector<16xf16>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : f16 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (f16, !llvm.vec<16 x f16>) -> f16 +// CHECK-SAME: {reassoc = false} : (f16, vector<16xf16>) -> f16 // CHECK: llvm.return %[[V]] : f16 func @reduce_f32(%arg0: vector<16xf32>) -> f32 { @@ -783,10 +783,10 @@ func @reduce_f32(%arg0: vector<16xf32>) -> f32 { return %0 : f32 } // CHECK-LABEL: llvm.func @reduce_f32( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f32>) +// CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (f32, !llvm.vec<16 x f32>) -> f32 +// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32 // CHECK: llvm.return %[[V]] : f32 func @reduce_f64(%arg0: vector<16xf64>) -> f64 { @@ -794,10 +794,10 @@ func @reduce_f64(%arg0: vector<16xf64>) -> f64 { return %0 : f64 } // CHECK-LABEL: llvm.func @reduce_f64( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f64>) +// CHECK-SAME: %[[A:.*]]: vector<16xf64>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (f64, !llvm.vec<16 x f64>) -> f64 +// CHECK-SAME: {reassoc = false} : (f64, vector<16xf64>) -> f64 // CHECK: llvm.return %[[V]] : f64 func @reduce_i8(%arg0: vector<16xi8>) -> i8 { @@ -805,7 +805,7 @@ func @reduce_i8(%arg0: vector<16xi8>) -> i8 { return %0 : i8 } // CHECK-LABEL: llvm.func @reduce_i8( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i8>) +// CHECK-SAME: %[[A:.*]]: vector<16xi8>) // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : i8 @@ -814,7 +814,7 @@ func @reduce_i32(%arg0: vector<16xi32>) -> i32 { return %0 : i32 } // CHECK-LABEL: llvm.func @reduce_i32( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i32>) +// CHECK-SAME: %[[A:.*]]: vector<16xi32>) // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : i32 @@ -823,7 +823,7 @@ func @reduce_i64(%arg0: vector<16xi64>) -> i64 { return %0 : i64 } // CHECK-LABEL: llvm.func @reduce_i64( -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i64>) +// CHECK-SAME: %[[A:.*]]: vector<16xi64>) // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : i64 @@ -838,7 +838,7 @@ func @matrix_ops(%A: vector<64xf64>, %B: vector<48xf64>) -> vector<12xf64> { // CHECK-LABEL: llvm.func @matrix_ops // CHECK: llvm.intr.matrix.multiply %{{.*}}, %{{.*}} { // CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32 -// CHECK-SAME: } : (!llvm.vec<64 x f64>, !llvm.vec<48 x f64>) -> !llvm.vec<12 x f64> +// CHECK-SAME: } : (vector<64xf64>, vector<48xf64>) -> vector<12xf64> func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -851,74 +851,74 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> !llvm.vec<17 x f32> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : // CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : -// CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : // CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. // CHECK: %[[linearIndex:.*]] = llvm.mlir.constant(dense // CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi32>) : !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>) : vector<17xi32> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: %[[otrunc:.*]] = llvm.trunc %[[BASE]] : i64 to i32 -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> +// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : vector<17xi32> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[otrunc]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: i32] : !llvm.vec<17 x i32> +// CHECK-SAME: i32] : vector<17xi32> // CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>, vector<17xi32> // CHECK: %[[offsetVec4:.*]] = llvm.add %[[offsetVec3]], %[[linearIndex]] : -// CHECK-SAME: !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32> // // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] // CHECK: %[[dtrunc:.*]] = llvm.trunc %[[DIM]] : i64 to i32 -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> +// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : vector<17xi32> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dtrunc]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: i32] : !llvm.vec<17 x i32> +// CHECK-SAME: i32] : vector<17xi32> // CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>, vector<17xi32> // CHECK: %[[mask:.*]] = llvm.icmp "slt" %[[offsetVec4]], %[[dimVec3]] : -// CHECK-SAME: !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32> // // 5. Rewrite as a masked read. // CHECK: %[[PASS_THROUGH:.*]] = llvm.mlir.constant(dense<7.000000e+00> : -// CHECK-SAME: vector<17xf32>) : !llvm.vec<17 x f32> +// CHECK-SAME: vector<17xf32>) : vector<17xf32> // CHECK: %[[loaded:.*]] = llvm.intr.masked.load %[[vecPtr]], %[[mask]], // CHECK-SAME: %[[PASS_THROUGH]] {alignment = 4 : i32} : -// CHECK-SAME: (!llvm.ptr>, !llvm.vec<17 x i1>, !llvm.vec<17 x f32>) -> !llvm.vec<17 x f32> +// CHECK-SAME: (!llvm.ptr>, vector<17xi1>, vector<17xf32>) -> vector<17xf32> // // 1. Bitcast to vector form. // CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} : // CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[vecPtr_b:.*]] = llvm.bitcast %[[gep_b]] : -// CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. // CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant(dense // CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi32>) : !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>) : vector<17xi32> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>, vector<17xi32> // CHECK: llvm.add // // 4. Let dim the memref dimension, compute the vector comparison mask: @@ -926,13 +926,13 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> -// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>, vector<17xi32> +// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : vector<17xi32> // // 5. Rewrite as a masked write. // CHECK: llvm.intr.masked.store %[[loaded]], %[[vecPtr_b]], %[[mask_b]] // CHECK-SAME: {alignment = 4 : i32} : -// CHECK-SAME: !llvm.vec<17 x f32>, !llvm.vec<17 x i1> into !llvm.ptr> +// CHECK-SAME: vector<17xf32>, vector<17xi1> into !llvm.ptr> func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -942,34 +942,34 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_2d_to_1d -// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: i64, %[[BASE_1:[a-zA-Z0-9]*]]: i64) -> !llvm.vec<17 x f32> +// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: i64, %[[BASE_1:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> // CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : // CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // // Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: %[[trunc:.*]] = llvm.trunc %[[BASE_1]] : i64 to i32 -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> +// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : vector<17xi32> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[trunc]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: i32] : !llvm.vec<17 x i32> +// CHECK-SAME: i32] : vector<17xi32> // CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>, vector<17xi32> // // Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] // CHECK: %[[dimtrunc:.*]] = llvm.trunc %[[DIM]] : i64 to i32 -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i32> +// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : vector<17xi32> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dimtrunc]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: i32] : !llvm.vec<17 x i32> +// CHECK-SAME: i32] : vector<17xi32> // CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm.vec<17 x i32>, !llvm.vec<17 x i32> +// CHECK-SAME: vector<17xi32>, vector<17xi32> func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -982,13 +982,13 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) - return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d_non_zero_addrspace -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> !llvm.vec<17 x f32> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> // // 1. Check address space for GEP is correct. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : // CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.addrspacecast %[[gep]] : -// CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Check address space of the memref is correct. // CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : @@ -998,7 +998,7 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) - // CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} : // CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[vecPtr_b:.*]] = llvm.addrspacecast %[[gep_b]] : -// CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> func @transfer_read_1d_not_masked(%A : memref, %base: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -1007,16 +1007,16 @@ func @transfer_read_1d_not_masked(%A : memref, %base: index) -> vector<17 return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d_not_masked -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> !llvm.vec<17 x f32> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : // CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : -// CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Rewrite as a load. -// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] {alignment = 4 : i64} : !llvm.ptr> +// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] {alignment = 4 : i64} : !llvm.ptr> func @transfer_read_1d_cast(%A : memref, %base: index) -> vector<12xi8> { %c0 = constant 0: i32 @@ -1025,24 +1025,24 @@ func @transfer_read_1d_cast(%A : memref, %base: index) -> vector<12xi8> { return %v: vector<12xi8> } // CHECK-LABEL: func @transfer_read_1d_cast -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> !llvm.vec<12 x i8> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<12xi8> // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : // CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : -// CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Rewrite as a load. -// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] {alignment = 4 : i64} : !llvm.ptr> +// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] {alignment = 4 : i64} : !llvm.ptr> func @genbool_1d() -> vector<8xi1> { %0 = vector.constant_mask [4] : vector<8xi1> return %0 : vector<8xi1> } // CHECK-LABEL: func @genbool_1d -// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>) : !llvm.vec<8 x i1> -// CHECK: llvm.return %[[C1]] : !llvm.vec<8 x i1> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>) : vector<8xi1> +// CHECK: llvm.return %[[C1]] : vector<8xi1> func @genbool_2d() -> vector<4x4xi1> { %v = vector.constant_mask [2, 2] : vector<4x4xi1> @@ -1050,11 +1050,11 @@ func @genbool_2d() -> vector<4x4xi1> { } // CHECK-LABEL: func @genbool_2d -// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, false, false]> : vector<4xi1>) : !llvm.vec<4 x i1> -// CHECK: %[[C2:.*]] = llvm.mlir.constant(dense : vector<4x4xi1>) : !llvm.array<4 x vec<4 x i1>> -// CHECK: %[[T0:.*]] = llvm.insertvalue %[[C1]], %[[C2]][0] : !llvm.array<4 x vec<4 x i1>> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[C1]], %[[T0]][1] : !llvm.array<4 x vec<4 x i1>> -// CHECK: llvm.return %[[T1]] : !llvm.array<4 x vec<4 x i1>> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, false, false]> : vector<4xi1>) : vector<4xi1> +// CHECK: %[[C2:.*]] = llvm.mlir.constant(dense : vector<4x4xi1>) : !llvm.array<4 x vector<4xi1>> +// CHECK: %[[T0:.*]] = llvm.insertvalue %[[C1]], %[[C2]][0] : !llvm.array<4 x vector<4xi1>> +// CHECK: %[[T1:.*]] = llvm.insertvalue %[[C1]], %[[T0]][1] : !llvm.array<4 x vector<4xi1>> +// CHECK: llvm.return %[[T1]] : !llvm.array<4 x vector<4xi1>> func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } @@ -1063,11 +1063,11 @@ func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { } // CHECK-LABEL: func @flat_transpose -// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f32> +// CHECK-SAME: %[[A:.*]]: vector<16xf32> // CHECK: %[[T:.*]] = llvm.intr.matrix.transpose %[[A]] // CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} : -// CHECK-SAME: !llvm.vec<16 x f32> into !llvm.vec<16 x f32> -// CHECK: llvm.return %[[T]] : !llvm.vec<16 x f32> +// CHECK-SAME: vector<16xf32> into vector<16xf32> +// CHECK: llvm.return %[[T]] : vector<16xf32> func @masked_load_op(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector<16xf32>) -> vector<16xf32> { %c0 = constant 0: index @@ -1078,9 +1078,9 @@ func @masked_load_op(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector<1 // CHECK-LABEL: func @masked_load_op // CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr to !llvm.ptr> -// CHECK: %[[L:.*]] = llvm.intr.masked.load %[[B]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr>, !llvm.vec<16 x i1>, !llvm.vec<16 x f32>) -> !llvm.vec<16 x f32> -// CHECK: llvm.return %[[L]] : !llvm.vec<16 x f32> +// CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr to !llvm.ptr> +// CHECK: %[[L:.*]] = llvm.intr.masked.load %[[B]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr>, vector<16xi1>, vector<16xf32>) -> vector<16xf32> +// CHECK: llvm.return %[[L]] : vector<16xf32> func @masked_store_op(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector<16xf32>) { %c0 = constant 0: index @@ -1091,8 +1091,8 @@ func @masked_store_op(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector< // CHECK-LABEL: func @masked_store_op // CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr to !llvm.ptr> -// CHECK: llvm.intr.masked.store %{{.*}}, %[[B]], %{{.*}} {alignment = 4 : i32} : !llvm.vec<16 x f32>, !llvm.vec<16 x i1> into !llvm.ptr> +// CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr to !llvm.ptr> +// CHECK: llvm.intr.masked.store %{{.*}}, %[[B]], %{{.*}} {alignment = 4 : i32} : vector<16xf32>, vector<16xi1> into !llvm.ptr> // CHECK: llvm.return func @gather_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) -> vector<3xf32> { @@ -1101,9 +1101,9 @@ func @gather_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, } // CHECK-LABEL: func @gather_op -// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, !llvm.vec<3 x i32>) -> !llvm.vec<3 x ptr> -// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, !llvm.vec<3 x i1>, !llvm.vec<3 x f32>) -> !llvm.vec<3 x f32> -// CHECK: llvm.return %[[G]] : !llvm.vec<3 x f32> +// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr> +// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> +// CHECK: llvm.return %[[G]] : vector<3xf32> func @scatter_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) { vector.scatter %arg0[%arg1], %arg2, %arg3 : memref, vector<3xi32>, vector<3xi1>, vector<3xf32> @@ -1111,8 +1111,8 @@ func @scatter_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1> } // CHECK-LABEL: func @scatter_op -// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, !llvm.vec<3 x i32>) -> !llvm.vec<3 x ptr> -// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : !llvm.vec<3 x f32>, !llvm.vec<3 x i1> into !llvm.vec<3 x ptr> +// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr> +// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<3xf32>, vector<3xi1> into !llvm.vec<3 x ptr> // CHECK: llvm.return func @expand_load_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<11xf32>) -> vector<11xf32> { @@ -1124,8 +1124,8 @@ func @expand_load_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<1 // CHECK-LABEL: func @expand_load_op // CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK: %[[E:.*]] = "llvm.intr.masked.expandload"(%[[P]], %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.vec<11 x i1>, !llvm.vec<11 x f32>) -> !llvm.vec<11 x f32> -// CHECK: llvm.return %[[E]] : !llvm.vec<11 x f32> +// CHECK: %[[E:.*]] = "llvm.intr.masked.expandload"(%[[P]], %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<11xi1>, vector<11xf32>) -> vector<11xf32> +// CHECK: llvm.return %[[E]] : vector<11xf32> func @compress_store_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<11xf32>) { %c0 = constant 0: index @@ -1136,5 +1136,5 @@ func @compress_store_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vecto // CHECK-LABEL: func @compress_store_op // CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %[[P]], %{{.*}}) : (!llvm.vec<11 x f32>, !llvm.ptr, !llvm.vec<11 x i1>) -> () +// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %[[P]], %{{.*}}) : (vector<11xf32>, !llvm.ptr, vector<11xi1>) -> () // CHECK: llvm.return diff --git a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir index f8483dcc7f80b5..a6d994ad7db62c 100644 --- a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir +++ b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir @@ -9,7 +9,7 @@ func @transfer_readx2(%A : memref, %base: index) -> vector<2xf32> { return %f: vector<2xf32> } // CHECK-LABEL: @transfer_readx2 -// CHECK: rocdl.buffer.load {{.*}} !llvm.vec<2 x f32> +// CHECK: rocdl.buffer.load {{.*}} vector<2xf32> func @transfer_readx4(%A : memref, %base: index) -> vector<4xf32> { %f0 = constant 0.0: f32 @@ -19,7 +19,7 @@ func @transfer_readx4(%A : memref, %base: index) -> vector<4xf32> { return %f: vector<4xf32> } // CHECK-LABEL: @transfer_readx4 -// CHECK: rocdl.buffer.load {{.*}} !llvm.vec<4 x f32> +// CHECK: rocdl.buffer.load {{.*}} vector<4xf32> func @transfer_read_dwordConfig(%A : memref, %base: index) -> vector<4xf32> { %f0 = constant 0.0: f32 @@ -43,7 +43,7 @@ func @transfer_writex2(%A : memref, %B : vector<2xf32>, %base: index) { return } // CHECK-LABEL: @transfer_writex2 -// CHECK: rocdl.buffer.store {{.*}} !llvm.vec<2 x f32> +// CHECK: rocdl.buffer.store {{.*}} vector<2xf32> func @transfer_writex4(%A : memref, %B : vector<4xf32>, %base: index) { vector.transfer_write %B, %A[%base] @@ -52,7 +52,7 @@ func @transfer_writex4(%A : memref, %B : vector<4xf32>, %base: index) { return } // CHECK-LABEL: @transfer_writex4 -// CHECK: rocdl.buffer.store {{.*}} !llvm.vec<4 x f32> +// CHECK: rocdl.buffer.store {{.*}} vector<4xf32> func @transfer_write_dwordConfig(%A : memref, %B : vector<2xf32>, %base: index) { vector.transfer_write %B, %A[%base] diff --git a/mlir/test/Dialect/LLVMIR/dialect-cast.mlir b/mlir/test/Dialect/LLVMIR/dialect-cast.mlir index fb05a7060a2a4a..90eaaa24544f6e 100644 --- a/mlir/test/Dialect/LLVMIR/dialect-cast.mlir +++ b/mlir/test/Dialect/LLVMIR/dialect-cast.mlir @@ -9,7 +9,6 @@ func @mlir_dialect_cast(%0: index, %1: i32, %2: bf16, %3: f16, %4: f32, %5: f64, %10: memref<*xf32>) { llvm.mlir.cast %0 : index to i64 llvm.mlir.cast %0 : index to i32 - llvm.mlir.cast %6 : vector<42xf32> to !llvm.vec<42xf32> llvm.mlir.cast %7 : memref<42xf32> to !llvm.ptr llvm.mlir.cast %7 : memref<42xf32> to !llvm.struct<(ptr, ptr, i64, array<1xi64>, array<1xi64>)> llvm.mlir.cast %8 : memref to !llvm.struct<(ptr, ptr, i64, array<1xi64>, array<1xi64>)> @@ -72,27 +71,13 @@ func @mlir_dialect_cast_integer_non_integer(%0 : i16) { // ----- -func @mlir_dialect_cast_nd_vector(%0 : vector<2x2xf32>) { - // expected-error@+1 {{only 1-d vector is allowed}} - llvm.mlir.cast %0 : vector<2x2xf32> to !llvm.vec<4xf32> -} - -// ----- - func @mlir_dialect_cast_scalable_vector(%0 : vector<2xf32>) { - // expected-error@+1 {{only fixed-sized vector is allowed}} + // expected-error@+1 {{vector types should not be casted}} llvm.mlir.cast %0 : vector<2xf32> to !llvm.vec } // ----- -func @mlir_dialect_cast_vector_size_mismatch(%0 : vector<2xf32>) { - // expected-error@+1 {{invalid cast between vectors with mismatching sizes}} - llvm.mlir.cast %0 : vector<2xf32> to !llvm.vec<4xf32> -} - -// ----- - func @mlir_dialect_cast_dynamic_memref_bare_ptr(%0 : memref) { // expected-error@+1 {{unexpected bare pointer for dynamically shaped memref}} llvm.mlir.cast %0 : memref to !llvm.ptr diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index b496237f140be6..bb3a81fa6576b3 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -317,21 +317,21 @@ func @extractvalue_wrong_nesting() { // ----- -func @invalid_vector_type_1(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) { - // expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}} +func @invalid_vector_type_1(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) { + // expected-error@+1 {{expected LLVM dialect-compatible vector type for operand #1}} %0 = llvm.extractelement %arg2[%arg1 : i32] : f32 } // ----- -func @invalid_vector_type_2(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) { - // expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}} +func @invalid_vector_type_2(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) { + // expected-error@+1 {{expected LLVM dialect-compatible vector type for operand #1}} %0 = llvm.insertelement %arg2, %arg2[%arg1 : i32] : f32 } // ----- -func @invalid_vector_type_3(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) { +func @invalid_vector_type_3(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) { // expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}} %0 = llvm.shufflevector %arg2, %arg2 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : f32, f32 } @@ -366,74 +366,74 @@ func @nvvm_invalid_shfl_pred_3(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i3 // ----- -func @nvvm_invalid_mma_0(%a0 : f16, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_invalid_mma_0(%a0 : f16, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // expected-error@+1 {{expected operands to be 4 s followed by either 4 s or 8 floats}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (f16, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (f16, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } // ----- -func @nvvm_invalid_mma_1(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_invalid_mma_1(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // expected-error@+1 {{expected result type to be a struct of either 4 s or 8 floats}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)> } // ----- -func @nvvm_invalid_mma_2(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_invalid_mma_2(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // expected-error@+1 {{alayout and blayout attributes must be set to either "row" or "col"}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } // ----- -func @nvvm_invalid_mma_3(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, - %c0 : !llvm.vec<2 x f16>, %c1 : !llvm.vec<2 x f16>, - %c2 : !llvm.vec<2 x f16>, %c3 : !llvm.vec<2 x f16>) { +func @nvvm_invalid_mma_3(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, + %c0 : vector<2xf16>, %c1 : vector<2xf16>, + %c2 : vector<2xf16>, %c3 : vector<2xf16>) { // expected-error@+1 {{unimplemented mma.sync variant}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } // ----- -func @nvvm_invalid_mma_4(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_invalid_mma_4(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // expected-error@+1 {{unimplemented mma.sync variant}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(vec<2 x f16>, vec<2 x f16>, vec<2 x f16>, vec<2 x f16>)> - llvm.return %0 : !llvm.struct<(vec<2 x f16>, vec<2 x f16>, vec<2 x f16>, vec<2 x f16>)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> } // ----- -func @nvvm_invalid_mma_5(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_invalid_mma_5(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // expected-error@+1 {{unimplemented mma.sync variant}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } // ----- -func @nvvm_invalid_mma_6(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_invalid_mma_6(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // expected-error@+1 {{invalid kind of type specified}} @@ -443,12 +443,12 @@ func @nvvm_invalid_mma_6(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, // ----- -func @nvvm_invalid_mma_7(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_invalid_mma_7(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // expected-error@+1 {{op requires one result}} - %0:2 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> (!llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>, i32) + %0:2 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> (!llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>, i32) llvm.return %0#0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index ba0543e18cbc3d..545364dc07324d 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -60,11 +60,11 @@ func @nvvm_vote(%arg0 : i32, %arg1 : i1) -> i32 { llvm.return %0 : i32 } -func @nvvm_mma(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { - // CHECK: nvvm.mma.sync {{.*}} {alayout = "row", blayout = "col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + // CHECK: nvvm.mma.sync {{.*}} {alayout = "row", blayout = "col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir index c5314ab4d12873..31a56bede3bdd3 100644 --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -36,133 +36,133 @@ func @rocdl.barrier() { } func @rocdl.xdlops(%arg0 : f32, %arg1 : f32, - %arg2 : !llvm.vec<32 x f32>, %arg3 : i32, - %arg4 : !llvm.vec<16 x f32>, %arg5 : !llvm.vec<4 x f32>, - %arg6 : !llvm.vec<4 x f16>, %arg7 : !llvm.vec<32 x i32>, - %arg8 : !llvm.vec<16 x i32>, %arg9 : !llvm.vec<4 x i32>, - %arg10 : !llvm.vec<2 x i16>) -> !llvm.vec<32 x f32> { + %arg2 : vector<32xf32>, %arg3 : i32, + %arg4 : vector<16xf32>, %arg5 : vector<4xf32>, + %arg6 : vector<4xf16>, %arg7 : vector<32xi32>, + %arg8 : vector<16xi32>, %arg9 : vector<4xi32>, + %arg10 : vector<2xi16>) -> vector<32xf32> { // CHECK-LABEL: rocdl.xdlops - // CHECK: rocdl.mfma.f32.32x32x1f32 {{.*}} : (f32, f32, !llvm.vec<32 x f32>, i32, i32, i32) -> !llvm.vec<32 x f32> + // CHECK: rocdl.mfma.f32.32x32x1f32 {{.*}} : (f32, f32, vector<32xf32>, i32, i32, i32) -> vector<32xf32> %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<32 x f32>, - i32, i32, i32) -> !llvm.vec<32 x f32> + (f32, f32, vector<32xf32>, + i32, i32, i32) -> vector<32xf32> - // CHECK: rocdl.mfma.f32.16x16x1f32 {{.*}} : (f32, f32, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32> + // CHECK: rocdl.mfma.f32.16x16x1f32 {{.*}} : (f32, f32, vector<16xf32>, i32, i32, i32) -> vector<16xf32> %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (f32, f32, vector<16xf32>, + i32, i32, i32) -> vector<16xf32> - // CHECK: rocdl.mfma.f32.16x16x4f32 {{.*}} : (f32, f32, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32> + // CHECK: rocdl.mfma.f32.16x16x4f32 {{.*}} : (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32> %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (f32, f32, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> - // CHECK: rocdl.mfma.f32.4x4x1f32 {{.*}} : (f32, f32, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32> + // CHECK: rocdl.mfma.f32.4x4x1f32 {{.*}} : (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32> %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (f32, f32, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> - // CHECK: rocdl.mfma.f32.32x32x2f32 {{.*}} : (f32, f32, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32> + // CHECK: rocdl.mfma.f32.32x32x2f32 {{.*}} : (f32, f32, vector<16xf32>, i32, i32, i32) -> vector<16xf32> %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (f32, f32, vector<16xf32>, + i32, i32, i32) -> vector<16xf32> - // CHECK: rocdl.mfma.f32.32x32x4f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<32 x f32>, i32, i32, i32) -> !llvm.vec<32 x f32> + // CHECK: rocdl.mfma.f32.32x32x4f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<32 x f32>, - i32, i32, i32) -> !llvm.vec<32 x f32> + (vector<4xf16>, vector<4xf16>, vector<32xf32>, + i32, i32, i32) -> vector<32xf32> - // CHECK: rocdl.mfma.f32.16x16x4f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32> + // CHECK: rocdl.mfma.f32.16x16x4f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<4xf16>, vector<4xf16>, vector<16xf32>, + i32, i32, i32) -> vector<16xf32> - // CHECK: rocdl.mfma.f32.4x4x4f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32> + // CHECK: rocdl.mfma.f32.4x4x4f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<4xf16>, vector<4xf16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> - // CHECK: rocdl.mfma.f32.32x32x8f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32> + // CHECK: rocdl.mfma.f32.32x32x8f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<4xf16>, vector<4xf16>, vector<16xf32>, + i32, i32, i32) -> vector<16xf32> - // CHECK: rocdl.mfma.f32.16x16x16f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32> + // CHECK: rocdl.mfma.f32.16x16x16f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<4xf16>, vector<4xf16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> - // CHECK: rocdl.mfma.i32.32x32x4i8 {{.*}} : (i32, i32, !llvm.vec<32 x i32>, i32, i32, i32) -> !llvm.vec<32 x i32> + // CHECK: rocdl.mfma.i32.32x32x4i8 {{.*}} : (i32, i32, vector<32xi32>, i32, i32, i32) -> vector<32xi32> %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<32 x i32>, - i32, i32, i32) -> !llvm.vec<32 x i32> + (i32, i32, vector<32xi32>, + i32, i32, i32) -> vector<32xi32> - // CHECK: rocdl.mfma.i32.16x16x4i8 {{.*}} : (i32, i32, !llvm.vec<16 x i32>, i32, i32, i32) -> !llvm.vec<16 x i32> + // CHECK: rocdl.mfma.i32.16x16x4i8 {{.*}} : (i32, i32, vector<16xi32>, i32, i32, i32) -> vector<16xi32> %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<16 x i32>, - i32, i32, i32) -> !llvm.vec<16 x i32> + (i32, i32, vector<16xi32>, + i32, i32, i32) -> vector<16xi32> - // CHECK: rocdl.mfma.i32.4x4x4i8 {{.*}} : (i32, i32, !llvm.vec<4 x i32>, i32, i32, i32) -> !llvm.vec<4 x i32> + // CHECK: rocdl.mfma.i32.4x4x4i8 {{.*}} : (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32> %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<4 x i32>, - i32, i32, i32) -> !llvm.vec<4 x i32> + (i32, i32, vector<4xi32>, + i32, i32, i32) -> vector<4xi32> - // CHECK: rocdl.mfma.i32.32x32x8i8 {{.*}} : (i32, i32, !llvm.vec<16 x i32>, i32, i32, i32) -> !llvm.vec<16 x i32> + // CHECK: rocdl.mfma.i32.32x32x8i8 {{.*}} : (i32, i32, vector<16xi32>, i32, i32, i32) -> vector<16xi32> %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<16 x i32>, - i32, i32, i32) -> !llvm.vec<16 x i32> + (i32, i32, vector<16xi32>, + i32, i32, i32) -> vector<16xi32> - // CHECK: rocdl.mfma.i32.16x16x16i8 {{.*}} : (i32, i32, !llvm.vec<4 x i32>, i32, i32, i32) -> !llvm.vec<4 x i32> + // CHECK: rocdl.mfma.i32.16x16x16i8 {{.*}} : (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32> %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<4 x i32>, - i32, i32, i32) -> !llvm.vec<4 x i32> + (i32, i32, vector<4xi32>, + i32, i32, i32) -> vector<4xi32> - // CHECK: rocdl.mfma.f32.32x32x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x f32>, i32, i32, i32) -> !llvm.vec<32 x f32> + // CHECK: rocdl.mfma.f32.32x32x2bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x f32>, - i32, i32, i32) -> !llvm.vec<32 x f32> + (vector<2xi16>, vector<2xi16>, vector<32xf32>, + i32, i32, i32) -> vector<32xf32> - // CHECK: rocdl.mfma.f32.16x16x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32> + // CHECK: rocdl.mfma.f32.16x16x2bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<2xi16>, vector<2xi16>, vector<16xf32>, + i32, i32, i32) -> vector<16xf32> - // CHECK: rocdl.mfma.f32.4x4x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32> + // CHECK: rocdl.mfma.f32.4x4x2bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<2xi16>, vector<2xi16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> - // CHECK: rocdl.mfma.f32.32x32x4bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32> + // CHECK: rocdl.mfma.f32.32x32x4bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<2xi16>, vector<2xi16>, vector<16xf32>, + i32, i32, i32) -> vector<16xf32> - // CHECK: rocdl.mfma.f32.16x16x8bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32> + // CHECK: rocdl.mfma.f32.16x16x8bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<2xi16>, vector<2xi16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> - llvm.return %r0 : !llvm.vec<32 x f32> + llvm.return %r0 : vector<32xf32> } -llvm.func @rocdl.mubuf(%rsrc : !llvm.vec<4 x i32>, %vindex : i32, +llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32, %offset : i32, %glc : i1, - %slc : i1, %vdata1 : !llvm.vec<1 x f32>, - %vdata2 : !llvm.vec<2 x f32>, %vdata4 : !llvm.vec<4 x f32>) { + %slc : i1, %vdata1 : vector<1xf32>, + %vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) { // CHECK-LABEL: rocdl.mubuf - // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<1 x f32> - %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32> - // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<2 x f32> - %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32> - // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<4 x f32> - %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32> - - // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<1 x f32> - rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32> - // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<2 x f32> - rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32> - // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<4 x f32> - rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32> + // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<1xf32> + %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32> + // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<2xf32> + %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32> + // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<4xf32> + %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32> + + // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<1xf32> + rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32> + // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<2xf32> + rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32> + // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<4xf32> + rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32> llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index ff970178ac9fc0..3b10fc51a2eb2b 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -223,21 +223,21 @@ llvm.func @foo(%arg0: i32) -> !llvm.struct<(i32, f64, i32)> { } // CHECK-LABEL: @casts -// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: !llvm.vec<4 x i32>, %[[V4I64:.*]]: !llvm.vec<4 x i64>, %[[I32PTR:.*]]: !llvm.ptr) -func @casts(%arg0: i32, %arg1: i64, %arg2: !llvm.vec<4 x i32>, - %arg3: !llvm.vec<4 x i64>, %arg4: !llvm.ptr) { +// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: vector<4xi32>, %[[V4I64:.*]]: vector<4xi64>, %[[I32PTR:.*]]: !llvm.ptr) +func @casts(%arg0: i32, %arg1: i64, %arg2: vector<4xi32>, + %arg3: vector<4xi64>, %arg4: !llvm.ptr) { // CHECK: = llvm.sext %[[I32]] : i32 to i56 %0 = llvm.sext %arg0 : i32 to i56 // CHECK: = llvm.zext %[[I32]] : i32 to i64 %1 = llvm.zext %arg0 : i32 to i64 // CHECK: = llvm.trunc %[[I64]] : i64 to i56 %2 = llvm.trunc %arg1 : i64 to i56 -// CHECK: = llvm.sext %[[V4I32]] : !llvm.vec<4 x i32> to !llvm.vec<4 x i56> - %3 = llvm.sext %arg2 : !llvm.vec<4 x i32> to !llvm.vec<4 x i56> -// CHECK: = llvm.zext %[[V4I32]] : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - %4 = llvm.zext %arg2 : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> -// CHECK: = llvm.trunc %[[V4I64]] : !llvm.vec<4 x i64> to !llvm.vec<4 x i56> - %5 = llvm.trunc %arg3 : !llvm.vec<4 x i64> to !llvm.vec<4 x i56> +// CHECK: = llvm.sext %[[V4I32]] : vector<4xi32> to vector<4xi56> + %3 = llvm.sext %arg2 : vector<4xi32> to vector<4xi56> +// CHECK: = llvm.zext %[[V4I32]] : vector<4xi32> to vector<4xi64> + %4 = llvm.zext %arg2 : vector<4xi32> to vector<4xi64> +// CHECK: = llvm.trunc %[[V4I64]] : vector<4xi64> to vector<4xi56> + %5 = llvm.trunc %arg3 : vector<4xi64> to vector<4xi56> // CHECK: = llvm.sitofp %[[I32]] : i32 to f32 %6 = llvm.sitofp %arg0 : i32 to f32 // CHECK: %[[FLOAT:.*]] = llvm.uitofp %[[I32]] : i32 to f32 @@ -252,15 +252,15 @@ func @casts(%arg0: i32, %arg1: i64, %arg2: !llvm.vec<4 x i32>, } // CHECK-LABEL: @vect -func @vect(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) { -// CHECK: = llvm.extractelement {{.*}} : !llvm.vec<4 x f32> - %0 = llvm.extractelement %arg0[%arg1 : i32] : !llvm.vec<4 x f32> -// CHECK: = llvm.insertelement {{.*}} : !llvm.vec<4 x f32> - %1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : !llvm.vec<4 x f32> -// CHECK: = llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32> - %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32> -// CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x f32> - %3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : !llvm.vec<4 x f32> +func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) { +// CHECK: = llvm.extractelement {{.*}} : vector<4xf32> + %0 = llvm.extractelement %arg0[%arg1 : i32] : vector<4xf32> +// CHECK: = llvm.insertelement {{.*}} : vector<4xf32> + %1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : vector<4xf32> +// CHECK: = llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : vector<4xf32>, vector<4xf32> + %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : vector<4xf32>, vector<4xf32> +// CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + %3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : vector<4xf32> return } diff --git a/mlir/test/Dialect/LLVMIR/types-invalid.mlir b/mlir/test/Dialect/LLVMIR/types-invalid.mlir index a2a6d6163dad37..d1c661dab51661 100644 --- a/mlir/test/Dialect/LLVMIR/types-invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/types-invalid.mlir @@ -113,42 +113,42 @@ func @identified_struct_with_void() { func @dynamic_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec + "some.op"() : () -> !llvm.vec> } // ----- func @dynamic_scalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec + "some.op"() : () -> !llvm.vec> } // ----- func @unscalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec<4 x 4 x i32> + "some.op"() : () -> !llvm.vec<4x4 x ptr> } // ----- func @zero_vector() { // expected-error @+1 {{the number of vector elements must be positive}} - "some.op"() : () -> !llvm.vec<0 x i32> + "some.op"() : () -> !llvm.vec<0 x ptr> } // ----- func @nested_vector() { // expected-error @+1 {{invalid vector element type}} - "some.op"() : () -> !llvm.vec<2 x vec<2 x i32>> + "some.op"() : () -> !llvm.vec<2 x vector<2xi32>> } // ----- func @scalable_void_vector() { // expected-error @+1 {{invalid vector element type}} - "some.op"() : () -> !llvm.vec + "some.op"() : () -> !llvm.vec } // ----- diff --git a/mlir/test/Dialect/LLVMIR/types.mlir b/mlir/test/Dialect/LLVMIR/types.mlir index 74e0e7e936330b..cc549d07b1b465 100644 --- a/mlir/test/Dialect/LLVMIR/types.mlir +++ b/mlir/test/Dialect/LLVMIR/types.mlir @@ -90,10 +90,10 @@ func @ptr() { // CHECK-LABEL: @vec func @vec() { - // CHECK: !llvm.vec<4 x i32> - "some.op"() : () -> !llvm.vec<4 x i32> - // CHECK: !llvm.vec<4 x f32> - "some.op"() : () -> !llvm.vec<4 x f32> + // CHECK: vector<4xi32> + "some.op"() : () -> vector<4xi32> + // CHECK: vector<4xf32> + "some.op"() : () -> vector<4xf32> // CHECK: !llvm.vec "some.op"() : () -> !llvm.vec // CHECK: !llvm.vec diff --git a/mlir/test/Target/arm-neon.mlir b/mlir/test/Target/arm-neon.mlir index 955b4aeb40a09d..8e24b5bad29573 100644 --- a/mlir/test/Target/arm-neon.mlir +++ b/mlir/test/Target/arm-neon.mlir @@ -1,25 +1,25 @@ // RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate -arm-neon-mlir-to-llvmir | FileCheck %s // CHECK-LABEL: arm_neon_smull -llvm.func @arm_neon_smull(%arg0: !llvm.vec<8 x i8>, %arg1: !llvm.vec<8 x i8>) -> !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> { +llvm.func @arm_neon_smull(%arg0: vector<8xi8>, %arg1: vector<8xi8>) -> !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> { // CHECK: %[[V0:.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}) // CHECK-NEXT: %[[V00:.*]] = shufflevector <8 x i16> %3, <8 x i16> %[[V0]], <4 x i32> - %0 = "llvm_arm_neon.smull"(%arg0, %arg1) : (!llvm.vec<8 x i8>, !llvm.vec<8 x i8>) -> !llvm.vec<8 x i16> - %1 = llvm.shufflevector %0, %0 [3, 4, 5, 6] : !llvm.vec<8 x i16>, !llvm.vec<8 x i16> + %0 = "llvm_arm_neon.smull"(%arg0, %arg1) : (vector<8xi8>, vector<8xi8>) -> vector<8xi16> + %1 = llvm.shufflevector %0, %0 [3, 4, 5, 6] : vector<8xi16>, vector<8xi16> // CHECK-NEXT: %[[V1:.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %[[V00]], <4 x i16> %[[V00]]) // CHECK-NEXT: %[[V11:.*]] = shufflevector <4 x i32> %[[V1]], <4 x i32> %[[V1]], <2 x i32> - %2 = "llvm_arm_neon.smull"(%1, %1) : (!llvm.vec<4 x i16>, !llvm.vec<4 x i16>) -> !llvm.vec<4 x i32> - %3 = llvm.shufflevector %2, %2 [1, 2] : !llvm.vec<4 x i32>, !llvm.vec<4 x i32> + %2 = "llvm_arm_neon.smull"(%1, %1) : (vector<4xi16>, vector<4xi16>) -> vector<4xi32> + %3 = llvm.shufflevector %2, %2 [1, 2] : vector<4xi32>, vector<4xi32> // CHECK-NEXT: %[[V1:.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %[[V11]], <2 x i32> %[[V11]]) - %4 = "llvm_arm_neon.smull"(%3, %3) : (!llvm.vec<2 x i32>, !llvm.vec<2 x i32>) -> !llvm.vec<2 x i64> + %4 = "llvm_arm_neon.smull"(%3, %3) : (vector<2xi32>, vector<2xi32>) -> vector<2xi64> - %5 = llvm.mlir.undef : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> - %6 = llvm.insertvalue %0, %5[0] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> - %7 = llvm.insertvalue %2, %6[1] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> - %8 = llvm.insertvalue %4, %7[2] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> + %5 = llvm.mlir.undef : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> + %6 = llvm.insertvalue %0, %5[0] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> + %7 = llvm.insertvalue %2, %6[1] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> + %8 = llvm.insertvalue %4, %7[2] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> // CHECK: ret { <8 x i16>, <4 x i32>, <2 x i64> } - llvm.return %8 : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> + llvm.return %8 : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> } diff --git a/mlir/test/Target/arm-sve.mlir b/mlir/test/Target/arm-sve.mlir index 430f60b4ecace6..f00992e05bfd55 100644 --- a/mlir/test/Target/arm-sve.mlir +++ b/mlir/test/Target/arm-sve.mlir @@ -1,51 +1,51 @@ // RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate --arm-sve-mlir-to-llvmir | FileCheck %s // CHECK-LABEL: define @arm_sve_sdot -llvm.func @arm_sve_sdot(%arg0: !llvm.vec, - %arg1: !llvm.vec, - %arg2: !llvm.vec) - -> !llvm.vec { +llvm.func @arm_sve_sdot(%arg0: !llvm.vec, + %arg1: !llvm.vec, + %arg2: !llvm.vec) + -> !llvm.vec { // CHECK: call @llvm.aarch64.sve.sdot.nxv4i32(, !llvm.vec, !llvm.vec) - -> !llvm.vec - llvm.return %0 : !llvm.vec + (!llvm.vec, !llvm.vec, !llvm.vec) + -> !llvm.vec + llvm.return %0 : !llvm.vec } // CHECK-LABEL: define @arm_sve_smmla -llvm.func @arm_sve_smmla(%arg0: !llvm.vec, - %arg1: !llvm.vec, - %arg2: !llvm.vec) - -> !llvm.vec { +llvm.func @arm_sve_smmla(%arg0: !llvm.vec, + %arg1: !llvm.vec, + %arg2: !llvm.vec) + -> !llvm.vec { // CHECK: call @llvm.aarch64.sve.smmla.nxv4i32(, !llvm.vec, !llvm.vec) - -> !llvm.vec - llvm.return %0 : !llvm.vec + (!llvm.vec, !llvm.vec, !llvm.vec) + -> !llvm.vec + llvm.return %0 : !llvm.vec } // CHECK-LABEL: define @arm_sve_udot -llvm.func @arm_sve_udot(%arg0: !llvm.vec, - %arg1: !llvm.vec, - %arg2: !llvm.vec) - -> !llvm.vec { +llvm.func @arm_sve_udot(%arg0: !llvm.vec, + %arg1: !llvm.vec, + %arg2: !llvm.vec) + -> !llvm.vec { // CHECK: call @llvm.aarch64.sve.udot.nxv4i32(, !llvm.vec, !llvm.vec) - -> !llvm.vec - llvm.return %0 : !llvm.vec + (!llvm.vec, !llvm.vec, !llvm.vec) + -> !llvm.vec + llvm.return %0 : !llvm.vec } // CHECK-LABEL: define @arm_sve_ummla -llvm.func @arm_sve_ummla(%arg0: !llvm.vec, - %arg1: !llvm.vec, - %arg2: !llvm.vec) - -> !llvm.vec { +llvm.func @arm_sve_ummla(%arg0: !llvm.vec, + %arg1: !llvm.vec, + %arg2: !llvm.vec) + -> !llvm.vec { // CHECK: call @llvm.aarch64.sve.ummla.nxv4i32(, !llvm.vec, !llvm.vec) - -> !llvm.vec - llvm.return %0 : !llvm.vec + (!llvm.vec, !llvm.vec, !llvm.vec) + -> !llvm.vec + llvm.return %0 : !llvm.vec } // CHECK-LABEL: define i64 @get_vector_scale() diff --git a/mlir/test/Target/avx512.mlir b/mlir/test/Target/avx512.mlir index c3259383601529..80164ca837d466 100644 --- a/mlir/test/Target/avx512.mlir +++ b/mlir/test/Target/avx512.mlir @@ -1,31 +1,31 @@ // RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate --avx512-mlir-to-llvmir | FileCheck %s // CHECK-LABEL: define <16 x float> @LLVM_x86_avx512_mask_ps_512 -llvm.func @LLVM_x86_avx512_mask_ps_512(%a: !llvm.vec<16 x f32>, +llvm.func @LLVM_x86_avx512_mask_ps_512(%a: vector<16 x f32>, %b: i32, %c: i16) - -> (!llvm.vec<16 x f32>) + -> (vector<16 x f32>) { // CHECK: call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %0 = "llvm_avx512.mask.rndscale.ps.512"(%a, %b, %a, %c, %b) : - (!llvm.vec<16 x f32>, i32, !llvm.vec<16 x f32>, i16, i32) -> !llvm.vec<16 x f32> + (vector<16 x f32>, i32, vector<16 x f32>, i16, i32) -> vector<16 x f32> // CHECK: call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %1 = "llvm_avx512.mask.scalef.ps.512"(%a, %a, %a, %c, %b) : - (!llvm.vec<16 x f32>, !llvm.vec<16 x f32>, !llvm.vec<16 x f32>, i16, i32) -> !llvm.vec<16 x f32> - llvm.return %1: !llvm.vec<16 x f32> + (vector<16 x f32>, vector<16 x f32>, vector<16 x f32>, i16, i32) -> vector<16 x f32> + llvm.return %1: vector<16 x f32> } // CHECK-LABEL: define <8 x double> @LLVM_x86_avx512_mask_pd_512 -llvm.func @LLVM_x86_avx512_mask_pd_512(%a: !llvm.vec<8 x f64>, +llvm.func @LLVM_x86_avx512_mask_pd_512(%a: vector<8xf64>, %b: i32, %c: i8) - -> (!llvm.vec<8 x f64>) + -> (vector<8xf64>) { // CHECK: call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %0 = "llvm_avx512.mask.rndscale.pd.512"(%a, %b, %a, %c, %b) : - (!llvm.vec<8 x f64>, i32, !llvm.vec<8 x f64>, i8, i32) -> !llvm.vec<8 x f64> + (vector<8xf64>, i32, vector<8xf64>, i8, i32) -> vector<8xf64> // CHECK: call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %1 = "llvm_avx512.mask.scalef.pd.512"(%a, %a, %a, %c, %b) : - (!llvm.vec<8 x f64>, !llvm.vec<8 x f64>, !llvm.vec<8 x f64>, i8, i32) -> !llvm.vec<8 x f64> - llvm.return %1: !llvm.vec<8 x f64> + (vector<8xf64>, vector<8xf64>, vector<8xf64>, i8, i32) -> vector<8xf64> + llvm.return %1: vector<8xf64> } diff --git a/mlir/test/Target/import.ll b/mlir/test/Target/import.ll index c7b9218fca102b..97e0c656c14e73 100644 --- a/mlir/test/Target/import.ll +++ b/mlir/test/Target/import.ll @@ -10,7 +10,7 @@ ; CHECK: llvm.mlir.global internal @g3("string") @g3 = internal global [6 x i8] c"string" -; CHECK: llvm.mlir.global external @g5() : !llvm.vec<8 x i32> +; CHECK: llvm.mlir.global external @g5() : vector<8xi32> @g5 = external global <8 x i32> @g4 = external global i32, align 8 @@ -53,7 +53,7 @@ ; Sequential constants. ; -; CHECK: llvm.mlir.global internal constant @vector_constant(dense<[1, 2]> : vector<2xi32>) : !llvm.vec<2 x i32> +; CHECK: llvm.mlir.global internal constant @vector_constant(dense<[1, 2]> : vector<2xi32>) : vector<2xi32> @vector_constant = internal constant <2 x i32> ; CHECK: llvm.mlir.global internal constant @array_constant(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) : !llvm.array<2 x f32> @array_constant = internal constant [2 x float] [float 1., float 2.] @@ -61,7 +61,7 @@ @nested_array_constant = internal constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]] ; CHECK: llvm.mlir.global internal constant @nested_array_constant3(dense<[{{\[}}[1, 2], [3, 4]]]> : tensor<1x2x2xi32>) : !llvm.array<1 x array<2 x array<2 x i32>>> @nested_array_constant3 = internal constant [1 x [2 x [2 x i32]]] [[2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]]] -; CHECK: llvm.mlir.global internal constant @nested_array_vector(dense<[{{\[}}[1, 2], [3, 4]]]> : vector<1x2x2xi32>) : !llvm.array<1 x array<2 x vec<2 x i32>>> +; CHECK: llvm.mlir.global internal constant @nested_array_vector(dense<[{{\[}}[1, 2], [3, 4]]]> : vector<1x2x2xi32>) : !llvm.array<1 x array<2 x vector<2xi32>>> @nested_array_vector = internal constant [1 x [2 x <2 x i32>]] [[2 x <2 x i32>] [<2 x i32> , <2 x i32> ]] ; diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir index eedaa3e924f025..d218e35e774d64 100644 --- a/mlir/test/Target/llvmir-intrinsics.mlir +++ b/mlir/test/Target/llvmir-intrinsics.mlir @@ -1,285 +1,285 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s // CHECK-LABEL: @intrinsics -llvm.func @intrinsics(%arg0: f32, %arg1: f32, %arg2: !llvm.vec<8 x f32>, %arg3: !llvm.ptr) { +llvm.func @intrinsics(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: !llvm.ptr) { %c3 = llvm.mlir.constant(3 : i32) : i32 %c1 = llvm.mlir.constant(1 : i32) : i32 %c0 = llvm.mlir.constant(0 : i32) : i32 // CHECK: call float @llvm.fmuladd.f32 "llvm.intr.fmuladd"(%arg0, %arg1, %arg0) : (f32, f32, f32) -> f32 // CHECK: call <8 x float> @llvm.fmuladd.v8f32 - "llvm.intr.fmuladd"(%arg2, %arg2, %arg2) : (!llvm.vec<8 x f32>, !llvm.vec<8 x f32>, !llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.fmuladd"(%arg2, %arg2, %arg2) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> // CHECK: call float @llvm.fma.f32 "llvm.intr.fma"(%arg0, %arg1, %arg0) : (f32, f32, f32) -> f32 // CHECK: call <8 x float> @llvm.fma.v8f32 - "llvm.intr.fma"(%arg2, %arg2, %arg2) : (!llvm.vec<8 x f32>, !llvm.vec<8 x f32>, !llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.fma"(%arg2, %arg2, %arg2) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> // CHECK: call void @llvm.prefetch.p0i8(i8* %3, i32 0, i32 3, i32 1) "llvm.intr.prefetch"(%arg3, %c0, %c3, %c1) : (!llvm.ptr, i32, i32, i32) -> () llvm.return } // CHECK-LABEL: @exp_test -llvm.func @exp_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @exp_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.exp.f32 "llvm.intr.exp"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.exp.v8f32 - "llvm.intr.exp"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.exp"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @exp2_test -llvm.func @exp2_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @exp2_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.exp2.f32 "llvm.intr.exp2"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.exp2.v8f32 - "llvm.intr.exp2"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.exp2"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @log_test -llvm.func @log_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @log_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.log.f32 "llvm.intr.log"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.log.v8f32 - "llvm.intr.log"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.log"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @log10_test -llvm.func @log10_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @log10_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.log10.f32 "llvm.intr.log10"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.log10.v8f32 - "llvm.intr.log10"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.log10"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @log2_test -llvm.func @log2_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @log2_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.log2.f32 "llvm.intr.log2"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.log2.v8f32 - "llvm.intr.log2"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.log2"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @fabs_test -llvm.func @fabs_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @fabs_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.fabs.f32 "llvm.intr.fabs"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.fabs.v8f32 - "llvm.intr.fabs"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.fabs"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @sqrt_test -llvm.func @sqrt_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @sqrt_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.sqrt.f32 "llvm.intr.sqrt"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.sqrt.v8f32 - "llvm.intr.sqrt"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.sqrt"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @ceil_test -llvm.func @ceil_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @ceil_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.ceil.f32 "llvm.intr.ceil"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.ceil.v8f32 - "llvm.intr.ceil"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.ceil"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @floor_test -llvm.func @floor_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @floor_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.floor.f32 "llvm.intr.floor"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.floor.v8f32 - "llvm.intr.floor"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.floor"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @cos_test -llvm.func @cos_test(%arg0: f32, %arg1: !llvm.vec<8 x f32>) { +llvm.func @cos_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.cos.f32 "llvm.intr.cos"(%arg0) : (f32) -> f32 // CHECK: call <8 x float> @llvm.cos.v8f32 - "llvm.intr.cos"(%arg1) : (!llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.cos"(%arg1) : (vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @copysign_test -llvm.func @copysign_test(%arg0: f32, %arg1: f32, %arg2: !llvm.vec<8 x f32>, %arg3: !llvm.vec<8 x f32>) { +llvm.func @copysign_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) { // CHECK: call float @llvm.copysign.f32 "llvm.intr.copysign"(%arg0, %arg1) : (f32, f32) -> f32 // CHECK: call <8 x float> @llvm.copysign.v8f32 - "llvm.intr.copysign"(%arg2, %arg3) : (!llvm.vec<8 x f32>, !llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.copysign"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @pow_test -llvm.func @pow_test(%arg0: f32, %arg1: f32, %arg2: !llvm.vec<8 x f32>, %arg3: !llvm.vec<8 x f32>) { +llvm.func @pow_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) { // CHECK: call float @llvm.pow.f32 "llvm.intr.pow"(%arg0, %arg1) : (f32, f32) -> f32 // CHECK: call <8 x float> @llvm.pow.v8f32 - "llvm.intr.pow"(%arg2, %arg3) : (!llvm.vec<8 x f32>, !llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.pow"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @bitreverse_test -llvm.func @bitreverse_test(%arg0: i32, %arg1: !llvm.vec<8 x i32>) { +llvm.func @bitreverse_test(%arg0: i32, %arg1: vector<8xi32>) { // CHECK: call i32 @llvm.bitreverse.i32 "llvm.intr.bitreverse"(%arg0) : (i32) -> i32 // CHECK: call <8 x i32> @llvm.bitreverse.v8i32 - "llvm.intr.bitreverse"(%arg1) : (!llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> + "llvm.intr.bitreverse"(%arg1) : (vector<8xi32>) -> vector<8xi32> llvm.return } // CHECK-LABEL: @ctpop_test -llvm.func @ctpop_test(%arg0: i32, %arg1: !llvm.vec<8 x i32>) { +llvm.func @ctpop_test(%arg0: i32, %arg1: vector<8xi32>) { // CHECK: call i32 @llvm.ctpop.i32 "llvm.intr.ctpop"(%arg0) : (i32) -> i32 // CHECK: call <8 x i32> @llvm.ctpop.v8i32 - "llvm.intr.ctpop"(%arg1) : (!llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> + "llvm.intr.ctpop"(%arg1) : (vector<8xi32>) -> vector<8xi32> llvm.return } // CHECK-LABEL: @maxnum_test -llvm.func @maxnum_test(%arg0: f32, %arg1: f32, %arg2: !llvm.vec<8 x f32>, %arg3: !llvm.vec<8 x f32>) { +llvm.func @maxnum_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) { // CHECK: call float @llvm.maxnum.f32 "llvm.intr.maxnum"(%arg0, %arg1) : (f32, f32) -> f32 // CHECK: call <8 x float> @llvm.maxnum.v8f32 - "llvm.intr.maxnum"(%arg2, %arg3) : (!llvm.vec<8 x f32>, !llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.maxnum"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @minnum_test -llvm.func @minnum_test(%arg0: f32, %arg1: f32, %arg2: !llvm.vec<8 x f32>, %arg3: !llvm.vec<8 x f32>) { +llvm.func @minnum_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) { // CHECK: call float @llvm.minnum.f32 "llvm.intr.minnum"(%arg0, %arg1) : (f32, f32) -> f32 // CHECK: call <8 x float> @llvm.minnum.v8f32 - "llvm.intr.minnum"(%arg2, %arg3) : (!llvm.vec<8 x f32>, !llvm.vec<8 x f32>) -> !llvm.vec<8 x f32> + "llvm.intr.minnum"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> llvm.return } // CHECK-LABEL: @smax_test -llvm.func @smax_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @smax_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call i32 @llvm.smax.i32 "llvm.intr.smax"(%arg0, %arg1) : (i32, i32) -> i32 // CHECK: call <8 x i32> @llvm.smax.v8i32 - "llvm.intr.smax"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> + "llvm.intr.smax"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> llvm.return } // CHECK-LABEL: @smin_test -llvm.func @smin_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @smin_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call i32 @llvm.smin.i32 "llvm.intr.smin"(%arg0, %arg1) : (i32, i32) -> i32 // CHECK: call <8 x i32> @llvm.smin.v8i32 - "llvm.intr.smin"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> + "llvm.intr.smin"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> llvm.return } // CHECK-LABEL: @vector_reductions -llvm.func @vector_reductions(%arg0: f32, %arg1: !llvm.vec<8 x f32>, %arg2: !llvm.vec<8 x i32>) { +llvm.func @vector_reductions(%arg0: f32, %arg1: vector<8xf32>, %arg2: vector<8xi32>) { // CHECK: call i32 @llvm.vector.reduce.add.v8i32 - "llvm.intr.vector.reduce.add"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.add"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call i32 @llvm.vector.reduce.and.v8i32 - "llvm.intr.vector.reduce.and"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.and"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call float @llvm.vector.reduce.fmax.v8f32 - "llvm.intr.vector.reduce.fmax"(%arg1) : (!llvm.vec<8 x f32>) -> f32 + "llvm.intr.vector.reduce.fmax"(%arg1) : (vector<8xf32>) -> f32 // CHECK: call float @llvm.vector.reduce.fmin.v8f32 - "llvm.intr.vector.reduce.fmin"(%arg1) : (!llvm.vec<8 x f32>) -> f32 + "llvm.intr.vector.reduce.fmin"(%arg1) : (vector<8xf32>) -> f32 // CHECK: call i32 @llvm.vector.reduce.mul.v8i32 - "llvm.intr.vector.reduce.mul"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.mul"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call i32 @llvm.vector.reduce.or.v8i32 - "llvm.intr.vector.reduce.or"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.or"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call i32 @llvm.vector.reduce.smax.v8i32 - "llvm.intr.vector.reduce.smax"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.smax"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call i32 @llvm.vector.reduce.smin.v8i32 - "llvm.intr.vector.reduce.smin"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.smin"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call i32 @llvm.vector.reduce.umax.v8i32 - "llvm.intr.vector.reduce.umax"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.umax"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call i32 @llvm.vector.reduce.umin.v8i32 - "llvm.intr.vector.reduce.umin"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.umin"(%arg2) : (vector<8xi32>) -> i32 // CHECK: call float @llvm.vector.reduce.fadd.v8f32 - "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) : (f32, !llvm.vec<8 x f32>) -> f32 + "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) : (f32, vector<8xf32>) -> f32 // CHECK: call float @llvm.vector.reduce.fmul.v8f32 - "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) : (f32, !llvm.vec<8 x f32>) -> f32 + "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) : (f32, vector<8xf32>) -> f32 // CHECK: call reassoc float @llvm.vector.reduce.fadd.v8f32 - "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) {reassoc = true} : (f32, !llvm.vec<8 x f32>) -> f32 + "llvm.intr.vector.reduce.fadd"(%arg0, %arg1) {reassoc = true} : (f32, vector<8xf32>) -> f32 // CHECK: call reassoc float @llvm.vector.reduce.fmul.v8f32 - "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) {reassoc = true} : (f32, !llvm.vec<8 x f32>) -> f32 + "llvm.intr.vector.reduce.fmul"(%arg0, %arg1) {reassoc = true} : (f32, vector<8xf32>) -> f32 // CHECK: call i32 @llvm.vector.reduce.xor.v8i32 - "llvm.intr.vector.reduce.xor"(%arg2) : (!llvm.vec<8 x i32>) -> i32 + "llvm.intr.vector.reduce.xor"(%arg2) : (vector<8xi32>) -> i32 llvm.return } // CHECK-LABEL: @matrix_intrinsics // 4x16 16x3 -llvm.func @matrix_intrinsics(%A: !llvm.vec<64 x f32>, %B: !llvm.vec<48 x f32>, +llvm.func @matrix_intrinsics(%A: vector<64 x f32>, %B: vector<48 x f32>, %ptr: !llvm.ptr, %stride: i64) { // CHECK: call <12 x float> @llvm.matrix.multiply.v12f32.v64f32.v48f32(<64 x float> %0, <48 x float> %1, i32 4, i32 16, i32 3) %C = llvm.intr.matrix.multiply %A, %B { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32} : - (!llvm.vec<64 x f32>, !llvm.vec<48 x f32>) -> !llvm.vec<12 x f32> + (vector<64 x f32>, vector<48 x f32>) -> vector<12 x f32> // CHECK: call <48 x float> @llvm.matrix.transpose.v48f32(<48 x float> %1, i32 3, i32 16) %D = llvm.intr.matrix.transpose %B { rows = 3: i32, columns = 16: i32} : - !llvm.vec<48 x f32> into !llvm.vec<48 x f32> + vector<48 x f32> into vector<48 x f32> // CHECK: call <48 x float> @llvm.matrix.column.major.load.v48f32(float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) %E = llvm.intr.matrix.column.major.load %ptr, { isVolatile = 0: i1, rows = 3: i32, columns = 16: i32} : - !llvm.vec<48 x f32> from !llvm.ptr stride i64 + vector<48 x f32> from !llvm.ptr stride i64 // CHECK: call void @llvm.matrix.column.major.store.v48f32(<48 x float> %7, float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) llvm.intr.matrix.column.major.store %E, %ptr, { isVolatile = 0: i1, rows = 3: i32, columns = 16: i32} : - !llvm.vec<48 x f32> to !llvm.ptr stride i64 + vector<48 x f32> to !llvm.ptr stride i64 llvm.return } // CHECK-LABEL: @get_active_lane_mask -llvm.func @get_active_lane_mask(%base: i64, %n: i64) -> (!llvm.vec<7 x i1>) { +llvm.func @get_active_lane_mask(%base: i64, %n: i64) -> (vector<7xi1>) { // CHECK: call <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64 %0, i64 %1) - %0 = llvm.intr.get.active.lane.mask %base, %n : i64, i64 to !llvm.vec<7 x i1> - llvm.return %0 : !llvm.vec<7 x i1> + %0 = llvm.intr.get.active.lane.mask %base, %n : i64, i64 to vector<7xi1> + llvm.return %0 : vector<7xi1> } // CHECK-LABEL: @masked_load_store_intrinsics -llvm.func @masked_load_store_intrinsics(%A: !llvm.ptr>, %mask: !llvm.vec<7 x i1>) { +llvm.func @masked_load_store_intrinsics(%A: !llvm.ptr>, %mask: vector<7xi1>) { // CHECK: call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> undef) %a = llvm.intr.masked.load %A, %mask { alignment = 1: i32} : - (!llvm.ptr>, !llvm.vec<7 x i1>) -> !llvm.vec<7 x f32> + (!llvm.ptr>, vector<7xi1>) -> vector<7xf32> // CHECK: call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> %{{.*}}) %b = llvm.intr.masked.load %A, %mask, %a { alignment = 1: i32} : - (!llvm.ptr>, !llvm.vec<7 x i1>, !llvm.vec<7 x f32>) -> !llvm.vec<7 x f32> + (!llvm.ptr>, vector<7xi1>, vector<7xf32>) -> vector<7xf32> // CHECK: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> %{{.*}}, <7 x float>* %0, i32 {{.*}}, <7 x i1> %{{.*}}) llvm.intr.masked.store %b, %A, %mask { alignment = 1: i32} : - !llvm.vec<7 x f32>, !llvm.vec<7 x i1> into !llvm.ptr> + vector<7xf32>, vector<7xi1> into !llvm.ptr> llvm.return } // CHECK-LABEL: @masked_gather_scatter_intrinsics -llvm.func @masked_gather_scatter_intrinsics(%M: !llvm.vec<7 x ptr>, %mask: !llvm.vec<7 x i1>) { +llvm.func @masked_gather_scatter_intrinsics(%M: !llvm.vec<7 x ptr>, %mask: vector<7xi1>) { // CHECK: call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> undef) %a = llvm.intr.masked.gather %M, %mask { alignment = 1: i32} : - (!llvm.vec<7 x ptr>, !llvm.vec<7 x i1>) -> !llvm.vec<7 x f32> + (!llvm.vec<7 x ptr>, vector<7xi1>) -> vector<7xf32> // CHECK: call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> %{{.*}}) %b = llvm.intr.masked.gather %M, %mask, %a { alignment = 1: i32} : - (!llvm.vec<7 x ptr>, !llvm.vec<7 x i1>, !llvm.vec<7 x f32>) -> !llvm.vec<7 x f32> + (!llvm.vec<7 x ptr>, vector<7xi1>, vector<7xf32>) -> vector<7xf32> // CHECK: call void @llvm.masked.scatter.v7f32.v7p0f32(<7 x float> %{{.*}}, <7 x float*> %{{.*}}, i32 1, <7 x i1> %{{.*}}) llvm.intr.masked.scatter %b, %M, %mask { alignment = 1: i32} : - !llvm.vec<7 x f32>, !llvm.vec<7 x i1> into !llvm.vec<7 x ptr> + vector<7xf32>, vector<7xi1> into !llvm.vec<7 x ptr> llvm.return } // CHECK-LABEL: @masked_expand_compress_intrinsics -llvm.func @masked_expand_compress_intrinsics(%ptr: !llvm.ptr, %mask: !llvm.vec<7 x i1>, %passthru: !llvm.vec<7 x f32>) { +llvm.func @masked_expand_compress_intrinsics(%ptr: !llvm.ptr, %mask: vector<7xi1>, %passthru: vector<7xf32>) { // CHECK: call <7 x float> @llvm.masked.expandload.v7f32(float* %{{.*}}, <7 x i1> %{{.*}}, <7 x float> %{{.*}}) %0 = "llvm.intr.masked.expandload"(%ptr, %mask, %passthru) - : (!llvm.ptr, !llvm.vec<7 x i1>, !llvm.vec<7 x f32>) -> (!llvm.vec<7 x f32>) + : (!llvm.ptr, vector<7xi1>, vector<7xf32>) -> (vector<7xf32>) // CHECK: call void @llvm.masked.compressstore.v7f32(<7 x float> %{{.*}}, float* %{{.*}}, <7 x i1> %{{.*}}) "llvm.intr.masked.compressstore"(%0, %ptr, %mask) - : (!llvm.vec<7 x f32>, !llvm.ptr, !llvm.vec<7 x i1>) -> () + : (vector<7xf32>, !llvm.ptr, vector<7xi1>) -> () llvm.return } @@ -294,56 +294,56 @@ llvm.func @memcpy_test(%arg0: i32, %arg1: i1, %arg2: !llvm.ptr, %arg3: !llvm } // CHECK-LABEL: @sadd_with_overflow_test -llvm.func @sadd_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @sadd_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call { i32, i1 } @llvm.sadd.with.overflow.i32 "llvm.intr.sadd.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> // CHECK: call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32 - "llvm.intr.sadd.with.overflow"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.struct<(vec<8 x i32>, vec<8 x i1>)> + "llvm.intr.sadd.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> llvm.return } // CHECK-LABEL: @uadd_with_overflow_test -llvm.func @uadd_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @uadd_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call { i32, i1 } @llvm.uadd.with.overflow.i32 "llvm.intr.uadd.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> // CHECK: call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32 - "llvm.intr.uadd.with.overflow"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.struct<(vec<8 x i32>, vec<8 x i1>)> + "llvm.intr.uadd.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> llvm.return } // CHECK-LABEL: @ssub_with_overflow_test -llvm.func @ssub_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @ssub_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call { i32, i1 } @llvm.ssub.with.overflow.i32 "llvm.intr.ssub.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> // CHECK: call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32 - "llvm.intr.ssub.with.overflow"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.struct<(vec<8 x i32>, vec<8 x i1>)> + "llvm.intr.ssub.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> llvm.return } // CHECK-LABEL: @usub_with_overflow_test -llvm.func @usub_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @usub_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call { i32, i1 } @llvm.usub.with.overflow.i32 "llvm.intr.usub.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> // CHECK: call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32 - "llvm.intr.usub.with.overflow"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.struct<(vec<8 x i32>, vec<8 x i1>)> + "llvm.intr.usub.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> llvm.return } // CHECK-LABEL: @smul_with_overflow_test -llvm.func @smul_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @smul_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call { i32, i1 } @llvm.smul.with.overflow.i32 "llvm.intr.smul.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> // CHECK: call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32 - "llvm.intr.smul.with.overflow"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.struct<(vec<8 x i32>, vec<8 x i1>)> + "llvm.intr.smul.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> llvm.return } // CHECK-LABEL: @umul_with_overflow_test -llvm.func @umul_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { +llvm.func @umul_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { // CHECK: call { i32, i1 } @llvm.umul.with.overflow.i32 "llvm.intr.umul.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> // CHECK: call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32 - "llvm.intr.umul.with.overflow"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.struct<(vec<8 x i32>, vec<8 x i1>)> + "llvm.intr.umul.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> llvm.return } diff --git a/mlir/test/Target/llvmir-types.mlir b/mlir/test/Target/llvmir-types.mlir index 5ea83549f04160..a4c4f201f48772 100644 --- a/mlir/test/Target/llvmir-types.mlir +++ b/mlir/test/Target/llvmir-types.mlir @@ -87,15 +87,15 @@ llvm.func @return_ppi8_42_9() -> !llvm.ptr, 9> // // CHECK: declare <4 x i32> @return_v4_i32() -llvm.func @return_v4_i32() -> !llvm.vec<4 x i32> +llvm.func @return_v4_i32() -> vector<4xi32> // CHECK: declare <4 x float> @return_v4_float() -llvm.func @return_v4_float() -> !llvm.vec<4 x f32> +llvm.func @return_v4_float() -> vector<4xf32> // CHECK: declare @return_vs_4_i32() -llvm.func @return_vs_4_i32() -> !llvm.vec +llvm.func @return_vs_4_i32() -> !llvm.vec // CHECK: declare @return_vs_8_half() -llvm.func @return_vs_8_half() -> !llvm.vec +llvm.func @return_vs_8_half() -> !llvm.vec // CHECK: declare <4 x i8*> @return_v_4_pi8() -llvm.func @return_v_4_pi8() -> !llvm.vec<4 x ptr> +llvm.func @return_v_4_pi8() -> !llvm.vec<4xptr> // // Arrays. diff --git a/mlir/test/Target/llvmir.mlir b/mlir/test/Target/llvmir.mlir index 5a686bfdee6e44..4645ef96a9d032 100644 --- a/mlir/test/Target/llvmir.mlir +++ b/mlir/test/Target/llvmir.mlir @@ -782,66 +782,66 @@ llvm.func @multireturn_caller() { } // CHECK-LABEL: define <4 x float> @vector_ops(<4 x float> {{%.*}}, <4 x i1> {{%.*}}, <4 x i64> {{%.*}}) -llvm.func @vector_ops(%arg0: !llvm.vec<4 x f32>, %arg1: !llvm.vec<4 x i1>, %arg2: !llvm.vec<4 x i64>) -> !llvm.vec<4 x f32> { - %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm.vec<4 x f32> +llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>) -> vector<4xf32> { + %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : vector<4xf32> // CHECK-NEXT: %4 = fadd <4 x float> %0, - %1 = llvm.fadd %arg0, %0 : !llvm.vec<4 x f32> + %1 = llvm.fadd %arg0, %0 : vector<4xf32> // CHECK-NEXT: %5 = select <4 x i1> %1, <4 x float> %4, <4 x float> %0 - %2 = llvm.select %arg1, %1, %arg0 : !llvm.vec<4 x i1>, !llvm.vec<4 x f32> + %2 = llvm.select %arg1, %1, %arg0 : vector<4xi1>, vector<4xf32> // CHECK-NEXT: %6 = sdiv <4 x i64> %2, %2 - %3 = llvm.sdiv %arg2, %arg2 : !llvm.vec<4 x i64> + %3 = llvm.sdiv %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %7 = udiv <4 x i64> %2, %2 - %4 = llvm.udiv %arg2, %arg2 : !llvm.vec<4 x i64> + %4 = llvm.udiv %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %8 = srem <4 x i64> %2, %2 - %5 = llvm.srem %arg2, %arg2 : !llvm.vec<4 x i64> + %5 = llvm.srem %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %9 = urem <4 x i64> %2, %2 - %6 = llvm.urem %arg2, %arg2 : !llvm.vec<4 x i64> + %6 = llvm.urem %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %10 = fdiv <4 x float> %0, - %7 = llvm.fdiv %arg0, %0 : !llvm.vec<4 x f32> + %7 = llvm.fdiv %arg0, %0 : vector<4xf32> // CHECK-NEXT: %11 = frem <4 x float> %0, - %8 = llvm.frem %arg0, %0 : !llvm.vec<4 x f32> + %8 = llvm.frem %arg0, %0 : vector<4xf32> // CHECK-NEXT: %12 = and <4 x i64> %2, %2 - %9 = llvm.and %arg2, %arg2 : !llvm.vec<4 x i64> + %9 = llvm.and %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %13 = or <4 x i64> %2, %2 - %10 = llvm.or %arg2, %arg2 : !llvm.vec<4 x i64> + %10 = llvm.or %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %14 = xor <4 x i64> %2, %2 - %11 = llvm.xor %arg2, %arg2 : !llvm.vec<4 x i64> + %11 = llvm.xor %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %15 = shl <4 x i64> %2, %2 - %12 = llvm.shl %arg2, %arg2 : !llvm.vec<4 x i64> + %12 = llvm.shl %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %16 = lshr <4 x i64> %2, %2 - %13 = llvm.lshr %arg2, %arg2 : !llvm.vec<4 x i64> + %13 = llvm.lshr %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %17 = ashr <4 x i64> %2, %2 - %14 = llvm.ashr %arg2, %arg2 : !llvm.vec<4 x i64> + %14 = llvm.ashr %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: ret <4 x float> %4 - llvm.return %1 : !llvm.vec<4 x f32> + llvm.return %1 : vector<4xf32> } // CHECK-LABEL: @vector_splat_1d -llvm.func @vector_splat_1d() -> !llvm.vec<4 x f32> { +llvm.func @vector_splat_1d() -> vector<4xf32> { // CHECK: ret <4 x float> zeroinitializer - %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4xf32>) : !llvm.vec<4 x f32> - llvm.return %0 : !llvm.vec<4 x f32> + %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4xf32>) : vector<4xf32> + llvm.return %0 : vector<4xf32> } // CHECK-LABEL: @vector_splat_2d -llvm.func @vector_splat_2d() -> !llvm.array<4 x vec<16 x f32>> { +llvm.func @vector_splat_2d() -> !llvm.array<4 x vector<16 x f32>> { // CHECK: ret [4 x <16 x float>] zeroinitializer - %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16xf32>) : !llvm.array<4 x vec<16 x f32>> - llvm.return %0 : !llvm.array<4 x vec<16 x f32>> + %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16xf32>) : !llvm.array<4 x vector<16 x f32>> + llvm.return %0 : !llvm.array<4 x vector<16 x f32>> } // CHECK-LABEL: @vector_splat_3d -llvm.func @vector_splat_3d() -> !llvm.array<4 x array<16 x vec<4 x f32>>> { +llvm.func @vector_splat_3d() -> !llvm.array<4 x array<16 x vector<4 x f32>>> { // CHECK: ret [4 x [16 x <4 x float>]] zeroinitializer - %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16x4xf32>) : !llvm.array<4 x array<16 x vec<4 x f32>>> - llvm.return %0 : !llvm.array<4 x array<16 x vec<4 x f32>>> + %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16x4xf32>) : !llvm.array<4 x array<16 x vector<4 x f32>>> + llvm.return %0 : !llvm.array<4 x array<16 x vector<4 x f32>>> } // CHECK-LABEL: @vector_splat_nonzero -llvm.func @vector_splat_nonzero() -> !llvm.vec<4 x f32> { +llvm.func @vector_splat_nonzero() -> vector<4xf32> { // CHECK: ret <4 x float> - %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x f32> - llvm.return %0 : !llvm.vec<4 x f32> + %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + llvm.return %0 : vector<4xf32> } // CHECK-LABEL: @ops @@ -1019,22 +1019,22 @@ llvm.func @fcmp(%arg0: f32, %arg1: f32) { } // CHECK-LABEL: @vect -llvm.func @vect(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) { +llvm.func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) { // CHECK-NEXT: extractelement <4 x float> {{.*}}, i32 // CHECK-NEXT: insertelement <4 x float> {{.*}}, float %2, i32 // CHECK-NEXT: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <5 x i32> - %0 = llvm.extractelement %arg0[%arg1 : i32] : !llvm.vec<4 x f32> - %1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : !llvm.vec<4 x f32> - %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32> + %0 = llvm.extractelement %arg0[%arg1 : i32] : vector<4xf32> + %1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : vector<4xf32> + %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : vector<4xf32>, vector<4xf32> llvm.return } // CHECK-LABEL: @vect_i64idx -llvm.func @vect_i64idx(%arg0: !llvm.vec<4 x f32>, %arg1: i64, %arg2: f32) { +llvm.func @vect_i64idx(%arg0: vector<4xf32>, %arg1: i64, %arg2: f32) { // CHECK-NEXT: extractelement <4 x float> {{.*}}, i64 // CHECK-NEXT: insertelement <4 x float> {{.*}}, float %2, i64 - %0 = llvm.extractelement %arg0[%arg1 : i64] : !llvm.vec<4 x f32> - %1 = llvm.insertelement %arg2, %arg0[%arg1 : i64] : !llvm.vec<4 x f32> + %0 = llvm.extractelement %arg0[%arg1 : i64] : vector<4xf32> + %1 = llvm.insertelement %arg2, %arg0[%arg1 : i64] : vector<4xf32> llvm.return } @@ -1050,10 +1050,10 @@ llvm.func @alloca(%size : i64) { } // CHECK-LABEL: @constants -llvm.func @constants() -> !llvm.vec<4 x f32> { +llvm.func @constants() -> vector<4xf32> { // CHECK: ret <4 x float> - %0 = llvm.mlir.constant(sparse<[[0]], [4.2e+01]> : vector<4xf32>) : !llvm.vec<4 x f32> - llvm.return %0 : !llvm.vec<4 x f32> + %0 = llvm.mlir.constant(sparse<[[0]], [4.2e+01]> : vector<4xf32>) : vector<4xf32> + llvm.return %0 : vector<4xf32> } // CHECK-LABEL: @fp_casts @@ -1088,12 +1088,12 @@ llvm.func @null() -> !llvm.ptr { // Check that dense elements attributes are exported properly in constants. // CHECK-LABEL: @elements_constant_3d_vector -llvm.func @elements_constant_3d_vector() -> !llvm.array<2 x array<2 x vec<2 x i32>>> { +llvm.func @elements_constant_3d_vector() -> !llvm.array<2 x array<2 x vector<2 x i32>>> { // CHECK: ret [2 x [2 x <2 x i32>]] // CHECK-SAME: {{\[}}[2 x <2 x i32>] [<2 x i32> , <2 x i32> ], // CHECK-SAME: [2 x <2 x i32>] [<2 x i32> , <2 x i32> ]] - %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : vector<2x2x2xi32>) : !llvm.array<2 x array<2 x vec<2 x i32>>> - llvm.return %0 : !llvm.array<2 x array<2 x vec<2 x i32>>> + %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : vector<2x2x2xi32>) : !llvm.array<2 x array<2 x vector<2 x i32>>> + llvm.return %0 : !llvm.array<2 x array<2 x vector<2 x i32>>> } // CHECK-LABEL: @elements_constant_3d_array diff --git a/mlir/test/Target/nvvmir.mlir b/mlir/test/Target/nvvmir.mlir index 63dd200be9d297..08aaa07b12a282 100644 --- a/mlir/test/Target/nvvmir.mlir +++ b/mlir/test/Target/nvvmir.mlir @@ -64,12 +64,12 @@ llvm.func @nvvm_vote(%0 : i32, %1 : i1) -> i32 { llvm.return %3 : i32 } -llvm.func @nvvm_mma(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>, - %b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>, +llvm.func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>, + %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32 - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } diff --git a/mlir/test/Target/rocdl.mlir b/mlir/test/Target/rocdl.mlir index b93c5f53e6ab11..1f4b8b03c81b82 100644 --- a/mlir/test/Target/rocdl.mlir +++ b/mlir/test/Target/rocdl.mlir @@ -43,133 +43,133 @@ llvm.func @rocdl.barrier() { } llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32, - %arg2 : !llvm.vec<32 x f32>, %arg3 : i32, - %arg4 : !llvm.vec<16 x f32>, %arg5 : !llvm.vec<4 x f32>, - %arg6 : !llvm.vec<4 x f16>, %arg7 : !llvm.vec<32 x i32>, - %arg8 : !llvm.vec<16 x i32>, %arg9 : !llvm.vec<4 x i32>, - %arg10 : !llvm.vec<2 x i16>) -> !llvm.vec<32 x f32> { + %arg2 : vector<32 x f32>, %arg3 : i32, + %arg4 : vector<16 x f32>, %arg5 : vector<4xf32>, + %arg6 : vector<4xf16>, %arg7 : vector<32 x i32>, + %arg8 : vector<16 x i32>, %arg9 : vector<4xi32>, + %arg10 : vector<2xi16>) -> vector<32 x f32> { // CHECK-LABEL: rocdl.xdlops // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<32 x f32>, - i32, i32, i32) -> !llvm.vec<32 x f32> + (f32, f32, vector<32 x f32>, + i32, i32, i32) -> vector<32 x f32> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (f32, f32, vector<16 x f32>, + i32, i32, i32) -> vector<16 x f32> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (f32, f32, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (f32, f32, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (f32, f32, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (f32, f32, vector<16 x f32>, + i32, i32, i32) -> vector<16 x f32> // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<32 x f32>, - i32, i32, i32) -> !llvm.vec<32 x f32> + (vector<4xf16>, vector<4xf16>, vector<32 x f32>, + i32, i32, i32) -> vector<32 x f32> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<4xf16>, vector<4xf16>, vector<16 x f32>, + i32, i32, i32) -> vector<16 x f32> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<4xf16>, vector<4xf16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<4xf16>, vector<4xf16>, vector<16 x f32>, + i32, i32, i32) -> vector<16 x f32> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<4xf16>, vector<4xf16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<32 x i32>, - i32, i32, i32) -> !llvm.vec<32 x i32> + (i32, i32, vector<32 x i32>, + i32, i32, i32) -> vector<32 x i32> // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<16 x i32>, - i32, i32, i32) -> !llvm.vec<16 x i32> + (i32, i32, vector<16 x i32>, + i32, i32, i32) -> vector<16 x i32> // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<4 x i32>, - i32, i32, i32) -> !llvm.vec<4 x i32> + (i32, i32, vector<4xi32>, + i32, i32, i32) -> vector<4xi32> // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<16 x i32>, - i32, i32, i32) -> !llvm.vec<16 x i32> + (i32, i32, vector<16 x i32>, + i32, i32, i32) -> vector<16 x i32> // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (i32, i32, !llvm.vec<4 x i32>, - i32, i32, i32) -> !llvm.vec<4 x i32> + (i32, i32, vector<4xi32>, + i32, i32, i32) -> vector<4xi32> // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x f32>, - i32, i32, i32) -> !llvm.vec<32 x f32> + (vector<2xi16>, vector<2xi16>, vector<32 x f32>, + i32, i32, i32) -> vector<32 x f32> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<2xi16>, vector<2xi16>, vector<16 x f32>, + i32, i32, i32) -> vector<16 x f32> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<2xi16>, vector<2xi16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, - i32, i32, i32) -> !llvm.vec<16 x f32> + (vector<2xi16>, vector<2xi16>, vector<16 x f32>, + i32, i32, i32) -> vector<16 x f32> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, - i32, i32, i32) -> !llvm.vec<4 x f32> + (vector<2xi16>, vector<2xi16>, vector<4xf32>, + i32, i32, i32) -> vector<4xf32> - llvm.return %r0 : !llvm.vec<32 x f32> + llvm.return %r0 : vector<32 x f32> } -llvm.func @rocdl.mubuf(%rsrc : !llvm.vec<4 x i32>, %vindex : i32, +llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32, %offset : i32, %glc : i1, - %slc : i1, %vdata1 : !llvm.vec<1 x f32>, - %vdata2 : !llvm.vec<2 x f32>, %vdata4 : !llvm.vec<4 x f32>) { + %slc : i1, %vdata1 : vector<1xf32>, + %vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) { // CHECK-LABEL: rocdl.mubuf // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32> + %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32> // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32> + %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32> // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32> + %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32> // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32> + rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32> // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32> + rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32> // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32> + rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32> llvm.return } From e8287cb2b2923af9da72fd953e2ec5495c33861a Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 12 Jan 2021 16:04:12 +0700 Subject: [PATCH 04/86] [Test] Add failing test for PR48725 --- .../Transforms/LoopStrengthReduce/pr48725.ll | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/pr48725.ll diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll b/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll new file mode 100644 index 00000000000000..ef25b92ffd1c0b --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll @@ -0,0 +1,102 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s +; XFAIL: * + +source_filename = "./simple.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: test +define void @test() { +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp = phi i32 [ undef, %bb ], [ %tmp87, %bb1 ] + %tmp2 = phi i32 [ undef, %bb ], [ %tmp86, %bb1 ] + %tmp3 = mul i32 %tmp, undef + %tmp4 = xor i32 %tmp3, -1 + %tmp5 = add i32 %tmp, %tmp4 + %tmp6 = add i32 %tmp2, -1 + %tmp7 = add i32 %tmp5, %tmp6 + %tmp8 = mul i32 %tmp7, %tmp3 + %tmp9 = xor i32 %tmp8, -1 + %tmp10 = add i32 %tmp7, %tmp9 + %tmp11 = add i32 %tmp10, undef + %tmp12 = mul i32 %tmp11, %tmp8 + %tmp13 = xor i32 %tmp12, -1 + %tmp14 = add i32 %tmp11, %tmp13 + %tmp15 = add i32 %tmp14, undef + %tmp16 = mul i32 %tmp15, %tmp12 + %tmp17 = add i32 %tmp15, undef + %tmp18 = add i32 %tmp17, undef + %tmp19 = mul i32 %tmp18, %tmp16 + %tmp20 = xor i32 %tmp19, -1 + %tmp21 = add i32 %tmp18, %tmp20 + %tmp22 = add i32 %tmp21, undef + %tmp23 = mul i32 %tmp22, %tmp19 + %tmp24 = xor i32 %tmp23, -1 + %tmp25 = add i32 %tmp22, %tmp24 + %tmp26 = add i32 %tmp25, undef + %tmp27 = mul i32 %tmp26, %tmp23 + %tmp28 = xor i32 %tmp27, -1 + %tmp29 = add i32 %tmp26, %tmp28 + %tmp30 = add i32 %tmp29, undef + %tmp31 = mul i32 %tmp30, %tmp27 + %tmp32 = xor i32 %tmp31, -1 + %tmp33 = add i32 %tmp30, %tmp32 + %tmp34 = add i32 %tmp33, undef + %tmp35 = mul i32 %tmp34, %tmp31 + %tmp36 = xor i32 %tmp35, -1 + %tmp37 = add i32 %tmp34, %tmp36 + %tmp38 = add i32 %tmp2, -9 + %tmp39 = add i32 %tmp37, %tmp38 + %tmp40 = mul i32 %tmp39, %tmp35 + %tmp41 = xor i32 %tmp40, -1 + %tmp42 = add i32 %tmp39, %tmp41 + %tmp43 = add i32 %tmp42, undef + %tmp44 = mul i32 %tmp43, %tmp40 + %tmp45 = xor i32 %tmp44, -1 + %tmp46 = add i32 %tmp43, %tmp45 + %tmp47 = add i32 %tmp46, undef + %tmp48 = mul i32 %tmp47, %tmp44 + %tmp49 = xor i32 %tmp48, -1 + %tmp50 = add i32 %tmp47, %tmp49 + %tmp51 = add i32 %tmp50, undef + %tmp52 = mul i32 %tmp51, %tmp48 + %tmp53 = xor i32 %tmp52, -1 + %tmp54 = add i32 %tmp51, %tmp53 + %tmp55 = add i32 %tmp54, undef + %tmp56 = mul i32 %tmp55, %tmp52 + %tmp57 = xor i32 %tmp56, -1 + %tmp58 = add i32 %tmp55, %tmp57 + %tmp59 = add i32 %tmp2, -14 + %tmp60 = add i32 %tmp58, %tmp59 + %tmp61 = mul i32 %tmp60, %tmp56 + %tmp62 = xor i32 %tmp61, -1 + %tmp63 = add i32 %tmp60, %tmp62 + %tmp64 = add i32 %tmp63, undef + %tmp65 = mul i32 %tmp64, %tmp61 + %tmp66 = xor i32 %tmp65, -1 + %tmp67 = add i32 %tmp64, %tmp66 + %tmp68 = add i32 %tmp67, undef + %tmp69 = mul i32 %tmp68, %tmp65 + %tmp70 = xor i32 %tmp69, -1 + %tmp71 = add i32 %tmp68, %tmp70 + %tmp72 = add i32 %tmp71, undef + %tmp73 = mul i32 %tmp72, %tmp69 + %tmp74 = xor i32 %tmp73, -1 + %tmp75 = add i32 %tmp72, %tmp74 + %tmp76 = add i32 %tmp75, undef + %tmp77 = mul i32 %tmp76, %tmp73 + %tmp78 = xor i32 %tmp77, -1 + %tmp79 = add i32 %tmp76, %tmp78 + %tmp80 = add i32 %tmp79, undef + %tmp81 = mul i32 %tmp80, %tmp77 + %tmp82 = xor i32 %tmp81, -1 + %tmp83 = add i32 %tmp80, %tmp82 + %tmp84 = add i32 %tmp83, undef + %tmp85 = add i32 %tmp84, undef + %tmp86 = add i32 %tmp2, -21 + %tmp87 = add i32 %tmp85, %tmp86 + br label %bb1 +} From c93b95593907c28cbcfde3d8266801587e110b42 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 28 Dec 2020 19:48:44 -0800 Subject: [PATCH 05/86] [WebAssembly] Remove more unnecessary brs in CFGStackify After placing markers, we removed some unnecessary branches, but it only handled the simplest case. This makes more unnecessary branches to be removed. Reviewed By: dschuff, tlively Differential Revision: https://reviews.llvm.org/D94047 --- .../WebAssembly/WebAssemblyCFGStackify.cpp | 34 ++++++- .../CodeGen/WebAssembly/cfg-stackify-eh.ll | 95 ++++++++++++++++++- 2 files changed, 121 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 9a6d8df8bdca66..a8e0c3efea0e24 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -637,11 +637,32 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { // try // ... // br bb2 <- Not necessary - // bb1: + // bb1 (ehpad): // catch // ... - // bb2: + // bb2: <- Continuation BB // end + // + // A more involved case: When the BB where 'end' is located is an another EH + // pad, the Cont (= continuation) BB is that EH pad's 'end' BB. For example, + // bb0: + // try + // try + // ... + // br bb3 <- Not necessary + // bb1 (ehpad): + // catch + // bb2 (ehpad): + // end + // catch + // ... + // bb3: <- Continuation BB + // end + // + // When the EH pad at hand is bb1, its matching end_try is in bb2. But it is + // another EH pad, so bb0's continuation BB becomes bb3. So 'br bb3' in the + // code can be deleted. This is why we run 'while' until 'Cont' is not an EH + // pad. for (auto &MBB : MF) { if (!MBB.isEHPad()) continue; @@ -649,7 +670,14 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode(); - MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent(); + + MachineBasicBlock *Cont = &MBB; + while (Cont->isEHPad()) { + MachineInstr *Try = EHPadToTry[Cont]; + MachineInstr *EndTry = BeginToEnd[Try]; + Cont = EndTry->getParent(); + } + bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond); // This condition means either // 1. This BB ends with a single unconditional branch whose destinaion is diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index 3079d9e15b82f2..209aaea2aaf69b 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -935,7 +935,18 @@ last: ; preds = %bb0 } ; Tests if CFGStackify's removeUnnecessaryInstrs() removes unnecessary branches -; correctly. +; correctly. The code is in the form below, where 'br' is unnecessary because +; after running the 'try' body the control flow will fall through to bb2 anyway. + +; bb0: +; try +; ... +; br bb2 <- Not necessary +; bb1 (ehpad): +; catch +; ... +; bb2: <- Continuation BB +; end ; CHECK-LABEL: test17 define void @test17(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { entry: @@ -974,17 +985,91 @@ try.cont: ; preds = %catch.start, %for.e ret void } +; void foo(); +; void test18() { +; try { +; foo(); +; try { +; foo(); +; } catch (...) { +; } +; } catch (...) { +; } +; } +; +; This tests whether the 'br' can be removed in code in the form as follows. +; Here 'br' is inside an inner try, whose 'end' is in another EH pad. In this +; case, after running an inner try body, the control flow should fall through to +; bb3, so the 'br' in the code is unnecessary. + +; bb0: +; try +; try +; ... +; br bb3 <- Not necessary +; bb1: +; catch +; bb2: +; end_try +; catch +; ... +; bb3: <- Continuation BB +; end +; +; CHECK-LABEL: test18 +define void @test18() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK: call foo +entry: + invoke void @foo() + to label %invoke.cont unwind label %catch.dispatch3 + +; CHECK: call foo +; CHECK-NOT: br +invoke.cont: ; preds = %entry + invoke void @foo() + to label %try.cont8 unwind label %catch.dispatch + +catch.dispatch: ; preds = %invoke.cont + %0 = catchswitch within none [label %catch.start] unwind label %catch.dispatch3 + +; CHECK: catch +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null] + %2 = call i8* @llvm.wasm.get.exception(token %1) + %3 = call i32 @llvm.wasm.get.ehselector(token %1) + %4 = call i8* @__cxa_begin_catch(i8* %2) #2 [ "funclet"(token %1) ] + invoke void @__cxa_end_catch() [ "funclet"(token %1) ] + to label %invoke.cont2 unwind label %catch.dispatch3 + +catch.dispatch3: ; preds = %catch.start, %catch.dispatch, %entry + %5 = catchswitch within none [label %catch.start4] unwind to caller + +catch.start4: ; preds = %catch.dispatch3 + %6 = catchpad within %5 [i8* null] + %7 = call i8* @llvm.wasm.get.exception(token %6) + %8 = call i32 @llvm.wasm.get.ehselector(token %6) + %9 = call i8* @__cxa_begin_catch(i8* %7) #2 [ "funclet"(token %6) ] + call void @__cxa_end_catch() [ "funclet"(token %6) ] + catchret from %6 to label %try.cont8 + +try.cont8: ; preds = %invoke.cont, %invoke.cont2, %catch.start4 + ret void + +invoke.cont2: ; preds = %catch.start + catchret from %1 to label %try.cont8 +} + ; Here an exception is semantically contained in a loop. 'ehcleanup' BB belongs ; to the exception, but does not belong to the loop (because it does not have a ; path back to the loop header), and is placed after the loop latch block ; 'invoke.cont' intentionally. This tests if 'end_loop' marker is placed ; correctly not right after 'invoke.cont' part but after 'ehcleanup' part, -; NOSORT-LABEL: test18 +; NOSORT-LABEL: test19 ; NOSORT: loop ; NOSORT: try ; NOSORT: end_try ; NOSORT: end_loop -define void @test18(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +define void @test19(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { entry: br label %while.cond @@ -1029,14 +1114,14 @@ while.end: ; preds = %while.body, %while. ; before its corresponding `catch`, because both `try` and `catch` body should ; satisfy the return type requirements. -; NOSORT-LABEL: test19 +; NOSORT-LABEL: test20 ; NOSORT: try i32 ; NOSORT: loop i32 ; NOSORT: end_loop ; NOSORT: catch ; NOSORT: end_try ; NOSORT-NEXT: end_function -define i32 @test19(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +define i32 @test20(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { entry: %t = alloca %class.Object, align 1 br label %for.cond From 9ec72cfc61ad5d87bb9e719b8b01f56e4da88a5b Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 29 Dec 2020 16:03:37 +0300 Subject: [PATCH 06/86] [llvm-readef/obj] - Change the design structure of ELF dumper. NFCI. This is a refactoring for design of stuff in `ELFDumper.cpp`. The current design of ELF dumper is far from ideal. Currently most overridden functions (inherited from `ObjDumper`) in `ELFDumper` just forward to the functions of `ELFDumperStyle` (which can be either `GNUStyle` or `LLVMStyle`). A concrete implementation may be in any of `ELFDumper`/`DumperStyle`/`GNUStyle`/`LLVMStyle`. This patch reorganizes the classes by introducing `GNUStyleELFDumper`/`LLVMStyleELFDumper` which inherit from `ELFDumper`. The implementations are moved: `DumperStyle` -> `ELFDumper` `GNUStyle` -> `GNUStyleELFDumper` `LLVMStyle` -> `LLVMStyleELFDumper` With that we can avoid having a lot of redirection calls and helper methods. The number of code lines changes from 7142 to 6922 (reduced by ~3%) and the code overall looks cleaner. Differential revision: https://reviews.llvm.org/D93900 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 1022 ++++++++++--------------- 1 file changed, 399 insertions(+), 623 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index ca8f8472849658..a09ee6d630d783 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -82,8 +82,6 @@ using namespace ELF; namespace { -template class DumpStyle; - template struct RelSymbol { RelSymbol(const typename ELFT::Sym *S, StringRef N) : Sym(S), Name(N.str()) {} @@ -232,50 +230,120 @@ template class Relocation { Optional Addend; }; +template class MipsGOTParser; + template class ELFDumper : public ObjDumper { + LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) + public: ELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer); - void printFileHeaders() override; - void printSectionHeaders() override; - void printRelocations() override; - void printDependentLibs() override; - void printDynamicRelocations() override; - void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override; - void printHashSymbols() override; - void printSectionDetails() override; void printUnwindInfo() override; - - void printDynamicTable() override; void printNeededLibraries() override; - void printProgramHeaders(bool PrintProgramHeaders, - cl::boolOrDefault PrintSectionMapping) override; void printHashTable() override; void printGnuHashTable() override; void printLoadName() override; void printVersionInfo() override; - void printGroupSections() override; - void printArchSpecificInfo() override; - void printStackMap() const override; - void printHashHistograms() override; + const object::ELFObjectFile &getElfObject() const { return ObjF; }; - void printCGProfile() override; - void printAddrsig() override; + std::string describe(const Elf_Shdr &Sec) const; - void printNotes() override; + unsigned getHashTableEntSize() const { + // EM_S390 and ELF::EM_ALPHA platforms use 8-bytes entries in SHT_HASH + // sections. This violates the ELF specification. + if (Obj.getHeader().e_machine == ELF::EM_S390 || + Obj.getHeader().e_machine == ELF::EM_ALPHA) + return 8; + return 4; + } - void printELFLinkerOptions() override; - void printStackSizes() override; + Elf_Dyn_Range dynamic_table() const { + // A valid .dynamic section contains an array of entries terminated + // with a DT_NULL entry. However, sometimes the section content may + // continue past the DT_NULL entry, so to dump the section correctly, + // we first find the end of the entries by iterating over them. + Elf_Dyn_Range Table = DynamicTable.getAsArrayRef(); - const object::ELFObjectFile &getElfObject() const { return ObjF; }; + size_t Size = 0; + while (Size < Table.size()) + if (Table[Size++].getTag() == DT_NULL) + break; -private: - std::unique_ptr> ELFDumperStyle; + return Table.slice(0, Size); + } - LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) + Elf_Sym_Range dynamic_symbols() const { + if (!DynSymRegion) + return Elf_Sym_Range(); + return DynSymRegion->getAsArrayRef(); + } + + const Elf_Shdr *findSectionByName(StringRef Name) const; + + StringRef getDynamicStringTable() const { return DynamicStringTable; } + +protected: + virtual void printVersionSymbolSection(const Elf_Shdr *Sec) = 0; + virtual void printVersionDefinitionSection(const Elf_Shdr *Sec) = 0; + virtual void printVersionDependencySection(const Elf_Shdr *Sec) = 0; + + void + printDependentLibsHelper(function_ref OnSectionStart, + function_ref OnLibEntry); + + virtual void printRelRelaReloc(const Relocation &R, + const RelSymbol &RelSym) = 0; + virtual void printRelrReloc(const Elf_Relr &R) = 0; + virtual void printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg) {} + void printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab); + void printDynamicReloc(const Relocation &R); + void printDynamicRelocationsHelper(); + void printRelocationsHelper(const Elf_Shdr &Sec); + void forEachRelocationDo( + const Elf_Shdr &Sec, bool RawRelr, + llvm::function_ref &, unsigned, + const Elf_Shdr &, const Elf_Shdr *)> + RelRelaFn, + llvm::function_ref RelrFn); + + virtual void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, + bool NonVisibilityBitsUsed) const {}; + virtual void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, + Optional StrTable, bool IsDynamic, + bool NonVisibilityBitsUsed) const = 0; + + virtual void printMipsABIFlags() = 0; + virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; + virtual void printMipsPLT(const MipsGOTParser &Parser) = 0; + + Expected> getVersionTable(const Elf_Shdr &Sec, + ArrayRef *SymTab, + StringRef *StrTab) const; + StringRef getPrintableSectionName(const Elf_Shdr &Sec) const; + + std::vector getGroups(); + + bool printFunctionStackSize(uint64_t SymValue, + Optional FunctionSec, + const Elf_Shdr &StackSizeSec, DataExtractor Data, + uint64_t *Offset); + void printStackSize(const Relocation &R, const Elf_Shdr &RelocSec, + unsigned Ndx, const Elf_Shdr *SymTab, + const Elf_Shdr *FunctionSec, const Elf_Shdr &StackSizeSec, + const RelocationResolver &Resolver, DataExtractor Data); + virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0; + + void printRelocatableStackSizes(std::function PrintHeader); + void printNonRelocatableStackSizes(std::function PrintHeader); + + const object::ELFObjectFile &ObjF; + const ELFFile &Obj; + StringRef FileName; Expected createDRI(uint64_t Offset, uint64_t Size, uint64_t EntSize) { @@ -299,8 +367,6 @@ template class ELFDumper : public ObjDumper { bool &IsDefault) const; Error LoadVersionMap() const; - const object::ELFObjectFile &ObjF; - const ELFFile &Obj; DynRegionInfo DynRelRegion; DynRegionInfo DynRelaRegion; DynRegionInfo DynRelrRegion; @@ -327,44 +393,6 @@ template class ELFDumper : public ObjDumper { }; mutable SmallVector, 16> VersionMap; - std::string describe(const Elf_Shdr &Sec) const; - -public: - unsigned getHashTableEntSize() const { - // EM_S390 and ELF::EM_ALPHA platforms use 8-bytes entries in SHT_HASH - // sections. This violates the ELF specification. - if (Obj.getHeader().e_machine == ELF::EM_S390 || - Obj.getHeader().e_machine == ELF::EM_ALPHA) - return 8; - return 4; - } - - Elf_Dyn_Range dynamic_table() const { - // A valid .dynamic section contains an array of entries terminated - // with a DT_NULL entry. However, sometimes the section content may - // continue past the DT_NULL entry, so to dump the section correctly, - // we first find the end of the entries by iterating over them. - Elf_Dyn_Range Table = DynamicTable.getAsArrayRef(); - - size_t Size = 0; - while (Size < Table.size()) - if (Table[Size++].getTag() == DT_NULL) - break; - - return Table.slice(0, Size); - } - - Optional getDynSymRegion() const { return DynSymRegion; } - - Elf_Sym_Range dynamic_symbols() const { - if (!DynSymRegion) - return Elf_Sym_Range(); - return DynSymRegion->getAsArrayRef(); - } - - Elf_Rel_Range dyn_rels() const; - Elf_Rela_Range dyn_relas() const; - Elf_Relr_Range dyn_relrs() const; std::string getFullSymbolName(const Elf_Sym &Symbol, unsigned SymIndex, Optional StrTable, bool IsDynamic) const; @@ -380,24 +408,6 @@ template class ELFDumper : public ObjDumper { void printSymbolsHelper(bool IsDynamic) const; std::string getDynamicEntry(uint64_t Type, uint64_t Value) const; - const Elf_Shdr *findSectionByName(StringRef Name) const; - - const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; } - const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; } - const Elf_Shdr *getDotAddrsigSec() const { return DotAddrsigSec; } - ArrayRef getShndxTable() const { return ShndxTable; } - StringRef getDynamicStringTable() const { return DynamicStringTable; } - const DynRegionInfo &getDynRelRegion() const { return DynRelRegion; } - const DynRegionInfo &getDynRelaRegion() const { return DynRelaRegion; } - const DynRegionInfo &getDynRelrRegion() const { return DynRelrRegion; } - const DynRegionInfo &getDynPLTRelRegion() const { return DynPLTRelRegion; } - const DynRegionInfo &getDynamicTableRegion() const { return DynamicTable; } - const Elf_Hash *getHashTable() const { return HashTable; } - const Elf_GnuHash *getGnuHashTable() const { return GnuHashTable; } - - Expected> getVersionTable(const Elf_Shdr &Sec, - ArrayRef *SymTab, - StringRef *StrTab) const; Expected> getVersionDefinitions(const Elf_Shdr &Sec) const; Expected> @@ -710,111 +720,21 @@ void ELFDumper::printSymbolsHelper(bool IsDynamic) const { bool NonVisibilityBitsUsed = llvm::any_of(Syms, [](const Elf_Sym &S) { return S.st_other & ~0x3; }); - ELFDumperStyle->printSymtabMessage(SymtabSec, Entries, NonVisibilityBitsUsed); + printSymtabMessage(SymtabSec, Entries, NonVisibilityBitsUsed); for (const Elf_Sym &Sym : Syms) - ELFDumperStyle->printSymbol(Sym, &Sym - Syms.begin(), StrTable, IsDynamic, - NonVisibilityBitsUsed); + printSymbol(Sym, &Sym - Syms.begin(), StrTable, IsDynamic, + NonVisibilityBitsUsed); } -template class MipsGOTParser; - -template class DumpStyle { -public: - LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - - DumpStyle(const ELFDumper &Dumper) - : Obj(Dumper.getElfObject().getELFFile()), ElfObj(Dumper.getElfObject()), - Dumper(Dumper) { - FileName = ElfObj.getFileName(); - } - - virtual ~DumpStyle() = default; - - virtual void printFileHeaders() = 0; - virtual void printGroupSections() = 0; - virtual void printRelocations() = 0; - virtual void printSectionHeaders() = 0; - virtual void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) = 0; - virtual void printHashSymbols() {} - virtual void printSectionDetails() {} - virtual void printDependentLibs() = 0; - virtual void printDynamic() {} - virtual void printDynamicRelocations() = 0; - virtual void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, - bool NonVisibilityBitsUsed) {} - virtual void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, - Optional StrTable, bool IsDynamic, - bool NonVisibilityBitsUsed) = 0; - virtual void printProgramHeaders(bool PrintProgramHeaders, - cl::boolOrDefault PrintSectionMapping) = 0; - virtual void printVersionSymbolSection(const Elf_Shdr *Sec) = 0; - virtual void printVersionDefinitionSection(const Elf_Shdr *Sec) = 0; - virtual void printVersionDependencySection(const Elf_Shdr *Sec) = 0; - virtual void printHashHistograms() = 0; - virtual void printCGProfile() = 0; - virtual void printAddrsig() = 0; - virtual void printNotes() = 0; - virtual void printELFLinkerOptions() = 0; - virtual void printStackSizes() = 0; - void printNonRelocatableStackSizes(std::function PrintHeader); - void printRelocatableStackSizes(std::function PrintHeader); - bool printFunctionStackSize(uint64_t SymValue, - Optional FunctionSec, - const Elf_Shdr &StackSizeSec, DataExtractor Data, - uint64_t *Offset); - void printStackSize(const Relocation &R, const Elf_Shdr &RelocSec, - unsigned Ndx, const Elf_Shdr *SymTab, - const Elf_Shdr *FunctionSec, const Elf_Shdr &StackSizeSec, - const RelocationResolver &Resolver, DataExtractor Data); - virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0; - virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; - virtual void printMipsPLT(const MipsGOTParser &Parser) = 0; - virtual void printMipsABIFlags() = 0; - const ELFDumper &dumper() const { return Dumper; } - void reportUniqueWarning(Error Err) const; - void reportUniqueWarning(const Twine &Msg) const; - -protected: - std::vector getGroups(); - - void printDependentLibsHelper( - function_ref OnSectionStart, - function_ref OnSectionEntry); - - virtual void printReloc(const Relocation &R, unsigned RelIndex, - const Elf_Shdr &Sec, const Elf_Shdr *SymTab) = 0; - virtual void printRelrReloc(const Elf_Relr &R) = 0; - virtual void printDynamicReloc(const Relocation &R) = 0; - void forEachRelocationDo( - const Elf_Shdr &Sec, bool RawRelr, - llvm::function_ref &, unsigned, - const Elf_Shdr &, const Elf_Shdr *)> - RelRelaFn, - llvm::function_ref RelrFn); - void printRelocationsHelper(const Elf_Shdr &Sec); - void printDynamicRelocationsHelper(); - virtual void printDynamicRelocHeader(unsigned Type, StringRef Name, - const DynRegionInfo &Reg){}; - - StringRef getPrintableSectionName(const Elf_Shdr &Sec) const; - - StringRef FileName; - const ELFFile &Obj; - const ELFObjectFile &ElfObj; - -private: - const ELFDumper &Dumper; -}; - -template class GNUStyle : public DumpStyle { +template class GNUELFDumper : public ELFDumper { formatted_raw_ostream &OS; public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - GNUStyle(ScopedPrinter &W, const ELFDumper &Dumper) - : DumpStyle(Dumper), - OS(static_cast(W.getOStream())) { + GNUELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer) + : ELFDumper(ObjF, Writer), + OS(static_cast(Writer.getOStream())) { assert(&W.getOStream() == &llvm::fouts()); } @@ -826,10 +746,10 @@ template class GNUStyle : public DumpStyle { void printHashSymbols() override; void printSectionDetails() override; void printDependentLibs() override; - void printDynamic() override; + void printDynamicTable() override; void printDynamicRelocations() override; void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, - bool NonVisibilityBitsUsed) override; + bool NonVisibilityBitsUsed) const override; void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; void printVersionSymbolSection(const Elf_Shdr *Sec) override; @@ -841,15 +761,10 @@ template class GNUStyle : public DumpStyle { void printNotes() override; void printELFLinkerOptions() override; void printStackSizes() override; - void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; - void printMipsGOT(const MipsGOTParser &Parser) override; - void printMipsPLT(const MipsGOTParser &Parser) override; - void printMipsABIFlags() override; private: void printHashHistogram(const Elf_Hash &HashTable); void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable); - void printHashTableSymbols(const Elf_Hash &HashTable); void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable); @@ -862,7 +777,7 @@ template class GNUStyle : public DumpStyle { }; template - std::string printEnum(T Value, ArrayRef> EnumValues) { + std::string printEnum(T Value, ArrayRef> EnumValues) const { for (const EnumEntry &EnumItem : EnumValues) if (EnumItem.Value == Value) return std::string(EnumItem.AltName); @@ -872,7 +787,7 @@ template class GNUStyle : public DumpStyle { template std::string printFlags(T Value, ArrayRef> EnumValues, TEnum EnumMask1 = {}, TEnum EnumMask2 = {}, - TEnum EnumMask3 = {}) { + TEnum EnumMask3 = {}) const { std::string Str; for (const EnumEntry &Flag : EnumValues) { if (Flag.Value == 0) @@ -896,7 +811,7 @@ template class GNUStyle : public DumpStyle { return Str; } - formatted_raw_ostream &printField(struct Field F) { + formatted_raw_ostream &printField(struct Field F) const { if (F.Column != 0) OS.PadToColumn(F.Column); OS << F.Str; @@ -905,42 +820,35 @@ template class GNUStyle : public DumpStyle { } void printHashedSymbol(const Elf_Sym *Sym, unsigned SymIndex, StringRef StrTable, uint32_t Bucket); - void printReloc(const Relocation &R, unsigned RelIndex, - const Elf_Shdr &Sec, const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; - void printRelRelaReloc(const Relocation &R, - const RelSymbol &RelSym); + const RelSymbol &RelSym) override; void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, Optional StrTable, bool IsDynamic, - bool NonVisibilityBitsUsed) override; + bool NonVisibilityBitsUsed) const override; void printDynamicRelocHeader(unsigned Type, StringRef Name, const DynRegionInfo &Reg) override; - void printDynamicReloc(const Relocation &R) override; - std::string getSymbolSectionNdx(const Elf_Sym &Symbol, unsigned SymIndex); + std::string getSymbolSectionNdx(const Elf_Sym &Symbol, + unsigned SymIndex) const; void printProgramHeaders(); void printSectionMapping(); void printGNUVersionSectionProlog(const typename ELFT::Shdr &Sec, const Twine &Label, unsigned EntriesNum); -}; -template -void DumpStyle::reportUniqueWarning(Error Err) const { - this->dumper().reportUniqueWarning(std::move(Err)); -} + void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; -template -void DumpStyle::reportUniqueWarning(const Twine &Msg) const { - this->dumper().reportUniqueWarning(Msg); -} + void printMipsGOT(const MipsGOTParser &Parser) override; + void printMipsPLT(const MipsGOTParser &Parser) override; + void printMipsABIFlags() override; +}; -template class LLVMStyle : public DumpStyle { +template class LLVMELFDumper : public ELFDumper { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - LLVMStyle(ScopedPrinter &W, const ELFDumper &Dumper) - : DumpStyle(Dumper), W(W) {} + LLVMELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer) + : ELFDumper(ObjF, Writer), W(Writer) {} void printFileHeaders() override; void printGroupSections() override; @@ -948,7 +856,7 @@ template class LLVMStyle : public DumpStyle { void printSectionHeaders() override; void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override; void printDependentLibs() override; - void printDynamic() override; + void printDynamicTable() override; void printDynamicRelocations() override; void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; @@ -961,26 +869,23 @@ template class LLVMStyle : public DumpStyle { void printNotes() override; void printELFLinkerOptions() override; void printStackSizes() override; - void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; - void printMipsGOT(const MipsGOTParser &Parser) override; - void printMipsPLT(const MipsGOTParser &Parser) override; - void printMipsABIFlags() override; private: - void printReloc(const Relocation &R, unsigned RelIndex, - const Elf_Shdr &Sec, const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; - void printDynamicReloc(const Relocation &R) override; + void printRelRelaReloc(const Relocation &R, + const RelSymbol &RelSym) override; - void printRelRelaReloc(const Relocation &R, StringRef SymbolName); - void printSymbols(); - void printDynamicSymbols(); - void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex); + void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex) const; void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, Optional StrTable, bool IsDynamic, - bool /*NonVisibilityBitsUsed*/) override; + bool /*NonVisibilityBitsUsed*/) const override; void printProgramHeaders(); void printSectionMapping() {} + void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; + + void printMipsGOT(const MipsGOTParser &Parser) override; + void printMipsPLT(const MipsGOTParser &Parser) override; + void printMipsABIFlags() override; ScopedPrinter &W; }; @@ -990,9 +895,11 @@ template class LLVMStyle : public DumpStyle { namespace llvm { template -static std::unique_ptr createELFDumper(const ELFObjectFile &Obj, - ScopedPrinter &Writer) { - return std::make_unique>(Obj, Writer); +static std::unique_ptr +createELFDumper(const ELFObjectFile &Obj, ScopedPrinter &Writer) { + if (opts::Output == opts::GNU) + return std::make_unique>(Obj, Writer); + return std::make_unique>(Obj, Writer); } std::unique_ptr createELFDumper(const object::ELFObjectFileBase &Obj, @@ -1076,7 +983,7 @@ Expected ELFDumper::getSymbolVersion(const Elf_Sym &Sym, // Get the corresponding version index entry. if (Expected EntryOrErr = Obj.template getEntry(*SymbolVersionSection, EntryIndex)) - return this->getSymbolVersionByIndex((*EntryOrErr)->vs_index, IsDefault); + return getSymbolVersionByIndex((*EntryOrErr)->vs_index, IsDefault); else return EntryOrErr.takeError(); } @@ -1116,8 +1023,8 @@ static std::string maybeDemangle(StringRef Name) { template std::string ELFDumper::getStaticSymbolName(uint32_t Index) const { auto Warn = [&](Error E) -> std::string { - this->reportUniqueWarning("unable to read the name of symbol with index " + - Twine(Index) + ": " + toString(std::move(E))); + reportUniqueWarning("unable to read the name of symbol with index " + + Twine(Index) + ": " + toString(std::move(E))); return ""; }; @@ -1860,7 +1767,7 @@ ELFDumper::findDynamic() { break; } } else { - this->reportUniqueWarning( + reportUniqueWarning( "unable to read program headers to locate the PT_DYNAMIC segment: " + toString(PhdrsOrErr.takeError())); } @@ -1991,13 +1898,9 @@ template ELFDumper::ELFDumper(const object::ELFObjectFile &O, ScopedPrinter &Writer) : ObjDumper(Writer, O.getFileName()), ObjF(O), Obj(O.getELFFile()), - DynRelRegion(O, *this), DynRelaRegion(O, *this), DynRelrRegion(O, *this), + FileName(O.getFileName()), DynRelRegion(O, *this), + DynRelaRegion(O, *this), DynRelrRegion(O, *this), DynPLTRelRegion(O, *this), DynamicTable(O, *this) { - if (opts::Output == opts::GNU) - ELFDumperStyle.reset(new GNUStyle(Writer, *this)); - else - ELFDumperStyle.reset(new LLVMStyle(Writer, *this)); - if (!O.IsContentValid()) return; @@ -2252,90 +2155,15 @@ template void ELFDumper::parseDynamicTable() { } } -template -typename ELFDumper::Elf_Rel_Range ELFDumper::dyn_rels() const { - return DynRelRegion.getAsArrayRef(); -} - -template -typename ELFDumper::Elf_Rela_Range ELFDumper::dyn_relas() const { - return DynRelaRegion.getAsArrayRef(); -} - -template -typename ELFDumper::Elf_Relr_Range ELFDumper::dyn_relrs() const { - return DynRelrRegion.getAsArrayRef(); -} - -template void ELFDumper::printFileHeaders() { - ELFDumperStyle->printFileHeaders(); -} - -template void ELFDumper::printSectionHeaders() { - ELFDumperStyle->printSectionHeaders(); -} - -template void ELFDumper::printRelocations() { - ELFDumperStyle->printRelocations(); -} - -template -void ELFDumper::printProgramHeaders( - bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { - ELFDumperStyle->printProgramHeaders(PrintProgramHeaders, PrintSectionMapping); -} - template void ELFDumper::printVersionInfo() { // Dump version symbol section. - ELFDumperStyle->printVersionSymbolSection(SymbolVersionSection); + printVersionSymbolSection(SymbolVersionSection); // Dump version definition section. - ELFDumperStyle->printVersionDefinitionSection(SymbolVersionDefSection); + printVersionDefinitionSection(SymbolVersionDefSection); // Dump version dependency section. - ELFDumperStyle->printVersionDependencySection(SymbolVersionNeedSection); -} - -template void ELFDumper::printDependentLibs() { - ELFDumperStyle->printDependentLibs(); -} - -template void ELFDumper::printDynamicRelocations() { - ELFDumperStyle->printDynamicRelocations(); -} - -template -void ELFDumper::printSymbols(bool PrintSymbols, - bool PrintDynamicSymbols) { - ELFDumperStyle->printSymbols(PrintSymbols, PrintDynamicSymbols); -} - -template void ELFDumper::printHashSymbols() { - ELFDumperStyle->printHashSymbols(); -} - -template void ELFDumper::printSectionDetails() { - ELFDumperStyle->printSectionDetails(); -} - -template void ELFDumper::printHashHistograms() { - ELFDumperStyle->printHashHistograms(); -} - -template void ELFDumper::printCGProfile() { - ELFDumperStyle->printCGProfile(); -} - -template void ELFDumper::printNotes() { - ELFDumperStyle->printNotes(); -} - -template void ELFDumper::printELFLinkerOptions() { - ELFDumperStyle->printELFLinkerOptions(); -} - -template void ELFDumper::printStackSizes() { - ELFDumperStyle->printStackSizes(); + printVersionDependencySection(SymbolVersionNeedSection); } #define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \ @@ -2641,8 +2469,6 @@ template void ELFDumper::printUnwindInfo() { Ctx.printUnwindInformation(); } -namespace { - template <> void ELFDumper::printUnwindInfo() { if (Obj.getHeader().e_machine == EM_ARM) { ARM::EHABI::PrinterContext Ctx(W, Obj, ObjF.getFileName(), @@ -2653,12 +2479,6 @@ template <> void ELFDumper::printUnwindInfo() { Ctx.printUnwindInformation(); } -} // end anonymous namespace - -template void ELFDumper::printDynamicTable() { - ELFDumperStyle->printDynamic(); -} - template void ELFDumper::printNeededLibraries() { ListScope D(W, "NeededLibraries"); @@ -2842,19 +2662,19 @@ template void ELFDumper::printArchSpecificInfo() { printAttributes(); break; case EM_MIPS: { - ELFDumperStyle->printMipsABIFlags(); + printMipsABIFlags(); printMipsOptions(); printMipsReginfo(); MipsGOTParser Parser(*this); if (Error E = Parser.findGOT(dynamic_table(), dynamic_symbols())) reportUniqueWarning(std::move(E)); else if (!Parser.isGotEmpty()) - ELFDumperStyle->printMipsGOT(Parser); + printMipsGOT(Parser); if (Error E = Parser.findPLT(dynamic_table())) reportUniqueWarning(std::move(E)); else if (!Parser.isPltEmpty()) - ELFDumperStyle->printMipsPLT(Parser); + printMipsPLT(Parser); break; } default: @@ -3427,12 +3247,16 @@ template void ELFDumper::printStackMap() const { prettyPrintStackMap(W, StackMapParser(*ContentOrErr)); } -template void ELFDumper::printGroupSections() { - ELFDumperStyle->printGroupSections(); -} - -template void ELFDumper::printAddrsig() { - ELFDumperStyle->printAddrsig(); +template +void ELFDumper::printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { + Expected> Target = getRelocationTarget(R, SymTab); + if (!Target) + reportUniqueWarning("unable to print relocation " + Twine(RelIndex) + + " in " + describe(Sec) + ": " + + toString(Target.takeError())); + else + printRelRelaReloc(R, *Target); } static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, @@ -3494,7 +3318,7 @@ static const EnumEntry *getObjectFileEnumEntry(unsigned Type) { return nullptr; } -template void GNUStyle::printFileHeaders() { +template void GNUELFDumper::printFileHeaders() { const Elf_Ehdr &e = this->Obj.getHeader(); OS << "ELF Header:\n"; OS << " Magic: "; @@ -3566,13 +3390,13 @@ template void GNUStyle::printFileHeaders() { printFields(OS, "Section header string table index:", Str); } -template std::vector DumpStyle::getGroups() { +template std::vector ELFDumper::getGroups() { auto GetSignature = [&](const Elf_Sym &Sym, unsigned SymNdx, const Elf_Shdr &Symtab) -> StringRef { Expected StrTableOrErr = Obj.getStringTableForSymtab(Symtab); if (!StrTableOrErr) { reportUniqueWarning("unable to get the string table for " + - describe(Obj, Symtab) + ": " + + describe(Symtab) + ": " + toString(StrTableOrErr.takeError())); return ""; } @@ -3604,11 +3428,11 @@ template std::vector DumpStyle::getGroups() { Signature = GetSignature(**SymOrErr, Sec.sh_info, **SymtabOrErr); else reportUniqueWarning("unable to get the signature symbol for " + - describe(Obj, Sec) + ": " + + describe(Sec) + ": " + toString(SymOrErr.takeError())); } else { reportUniqueWarning("unable to get the symbol table for " + - describe(Obj, Sec) + ": " + + describe(Sec) + ": " + toString(SymtabOrErr.takeError())); } @@ -3617,13 +3441,12 @@ template std::vector DumpStyle::getGroups() { Obj.template getSectionContentsAsArray(Sec)) { if (ContentsOrErr->empty()) reportUniqueWarning("unable to read the section group flag from the " + - describe(Obj, Sec) + ": the section is empty"); + describe(Sec) + ": the section is empty"); else Data = *ContentsOrErr; } else { - reportUniqueWarning("unable to get the content of the " + - describe(Obj, Sec) + ": " + - toString(ContentsOrErr.takeError())); + reportUniqueWarning("unable to get the content of the " + describe(Sec) + + ": " + toString(ContentsOrErr.takeError())); } Ret.push_back({getPrintableSectionName(Sec), @@ -3644,9 +3467,8 @@ template std::vector DumpStyle::getGroups() { GM.push_back({getPrintableSectionName(**SecOrErr), Ndx}); } else { reportUniqueWarning("unable to get the section with index " + - Twine(Ndx) + " when dumping the " + - describe(Obj, Sec) + ": " + - toString(SecOrErr.takeError())); + Twine(Ndx) + " when dumping the " + describe(Sec) + + ": " + toString(SecOrErr.takeError())); GM.push_back({"", Ndx}); } } @@ -3663,7 +3485,7 @@ mapSectionsToGroups(ArrayRef Groups) { return Ret; } -template void GNUStyle::printGroupSections() { +template void GNUELFDumper::printGroupSections() { std::vector V = this->getGroups(); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { @@ -3690,25 +3512,13 @@ template void GNUStyle::printGroupSections() { } template -void GNUStyle::printReloc(const Relocation &R, unsigned RelIndex, - const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { - Expected> Target = - this->dumper().getRelocationTarget(R, SymTab); - if (!Target) - this->reportUniqueWarning("unable to print relocation " + Twine(RelIndex) + - " in " + describe(this->Obj, Sec) + ": " + - toString(Target.takeError())); - else - printRelRelaReloc(R, *Target); -} - -template void GNUStyle::printRelrReloc(const Elf_Relr &R) { +void GNUELFDumper::printRelrReloc(const Elf_Relr &R) { OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; } template -void GNUStyle::printRelRelaReloc(const Relocation &R, - const RelSymbol &RelSym) { +void GNUELFDumper::printRelRelaReloc(const Relocation &R, + const RelSymbol &RelSym) { // First two fields are bit width dependent. The rest of them are fixed width. unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias}; @@ -3768,8 +3578,8 @@ static void printRelocHeaderFields(formatted_raw_ostream &OS, unsigned SType) { } template -void GNUStyle::printDynamicRelocHeader(unsigned Type, StringRef Name, - const DynRegionInfo &Reg) { +void GNUELFDumper::printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg) { uint64_t Offset = Reg.Addr - this->Obj.base(); OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x" << to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n"; @@ -3784,7 +3594,7 @@ static bool isRelocationSec(const typename ELFT::Shdr &Sec) { Sec.sh_type == ELF::SHT_ANDROID_RELR; } -template void GNUStyle::printRelocations() { +template void GNUELFDumper::printRelocations() { auto GetEntriesNum = [&](const Elf_Shdr &Sec) -> Expected { // Android's packed relocation section needs to be unpacked first // to get the actual number of entries. @@ -3819,7 +3629,7 @@ template void GNUStyle::printRelocations() { EntriesNum = std::to_string(*NumOrErr); else this->reportUniqueWarning("unable to get the number of relocations in " + - describe(this->Obj, Sec) + ": " + + this->describe(Sec) + ": " + toString(NumOrErr.takeError())); uintX_t Offset = Sec.sh_offset; @@ -3886,7 +3696,7 @@ static void printSectionDescription(formatted_raw_ostream &OS, OS << "p (processor specific)\n"; } -template void GNUStyle::printSectionHeaders() { +template void GNUELFDumper::printSectionHeaders() { unsigned Bias = ELFT::Is64Bits ? 0 : 8; ArrayRef Sections = cantFail(this->Obj.sections()); OS << "There are " << to_string(Sections.size()) @@ -3903,8 +3713,8 @@ template void GNUStyle::printSectionHeaders() { OS << "\n"; StringRef SecStrTable; - if (Expected SecStrTableOrErr = this->Obj.getSectionStringTable( - Sections, this->dumper().WarningHandler)) + if (Expected SecStrTableOrErr = + this->Obj.getSectionStringTable(Sections, this->WarningHandler)) SecStrTable = *SecStrTableOrErr; else this->reportUniqueWarning(SecStrTableOrErr.takeError()); @@ -3948,8 +3758,9 @@ template void GNUStyle::printSectionHeaders() { } template -void GNUStyle::printSymtabMessage(const Elf_Shdr *Symtab, size_t Entries, - bool NonVisibilityBitsUsed) { +void GNUELFDumper::printSymtabMessage(const Elf_Shdr *Symtab, + size_t Entries, + bool NonVisibilityBitsUsed) const { StringRef Name; if (Symtab) Name = this->getPrintableSectionName(*Symtab); @@ -3970,8 +3781,8 @@ void GNUStyle::printSymtabMessage(const Elf_Shdr *Symtab, size_t Entries, } template -std::string GNUStyle::getSymbolSectionNdx(const Elf_Sym &Symbol, - unsigned SymIndex) { +std::string GNUELFDumper::getSymbolSectionNdx(const Elf_Sym &Symbol, + unsigned SymIndex) const { unsigned SectionIndex = Symbol.st_shndx; switch (SectionIndex) { case ELF::SHN_UNDEF: @@ -3982,7 +3793,7 @@ std::string GNUStyle::getSymbolSectionNdx(const Elf_Sym &Symbol, return "COM"; case ELF::SHN_XINDEX: { Expected IndexOrErr = object::getExtendedSymbolTableIndex( - Symbol, SymIndex, this->dumper().getShndxTable()); + Symbol, SymIndex, this->ShndxTable); if (!IndexOrErr) { assert(Symbol.st_shndx == SHN_XINDEX && "getExtendedSymbolTableIndex should only fail due to an invalid " @@ -4013,9 +3824,10 @@ std::string GNUStyle::getSymbolSectionNdx(const Elf_Sym &Symbol, } template -void GNUStyle::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, - Optional StrTable, bool IsDynamic, - bool NonVisibilityBitsUsed) { +void GNUELFDumper::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, + Optional StrTable, + bool IsDynamic, + bool NonVisibilityBitsUsed) const { unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias, 31 + Bias, 38 + Bias, 48 + Bias, 51 + Bias}; @@ -4056,15 +3868,17 @@ void GNUStyle::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, Fields[6].Str = getSymbolSectionNdx(Symbol, SymIndex); Fields[7].Str = - this->dumper().getFullSymbolName(Symbol, SymIndex, StrTable, IsDynamic); + this->getFullSymbolName(Symbol, SymIndex, StrTable, IsDynamic); for (const Field &Entry : Fields) printField(Entry); OS << "\n"; } template -void GNUStyle::printHashedSymbol(const Elf_Sym *Symbol, unsigned SymIndex, - StringRef StrTable, uint32_t Bucket) { +void GNUELFDumper::printHashedSymbol(const Elf_Sym *Symbol, + unsigned SymIndex, + StringRef StrTable, + uint32_t Bucket) { unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[9] = {0, 6, 11, 20 + Bias, 25 + Bias, 34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias}; @@ -4087,8 +3901,7 @@ void GNUStyle::printHashedSymbol(const Elf_Sym *Symbol, unsigned SymIndex, Fields[6].Str = printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities)); Fields[7].Str = getSymbolSectionNdx(*Symbol, SymIndex); - Fields[8].Str = - this->dumper().getFullSymbolName(*Symbol, SymIndex, StrTable, true); + Fields[8].Str = this->getFullSymbolName(*Symbol, SymIndex, StrTable, true); for (const Field &Entry : Fields) printField(Entry); @@ -4096,19 +3909,19 @@ void GNUStyle::printHashedSymbol(const Elf_Sym *Symbol, unsigned SymIndex, } template -void GNUStyle::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) { +void GNUELFDumper::printSymbols(bool PrintSymbols, + bool PrintDynamicSymbols) { if (!PrintSymbols && !PrintDynamicSymbols) return; // GNU readelf prints both the .dynsym and .symtab with --symbols. - this->dumper().printSymbolsHelper(true); + this->printSymbolsHelper(true); if (PrintSymbols) - this->dumper().printSymbolsHelper(false); + this->printSymbolsHelper(false); } template -void GNUStyle::printHashTableSymbols(const Elf_Hash &SysVHash) { - StringRef StringTable = this->dumper().getDynamicStringTable(); - if (StringTable.empty()) +void GNUELFDumper::printHashTableSymbols(const Elf_Hash &SysVHash) { + if (this->DynamicStringTable.empty()) return; if (ELFT::Is64Bits) @@ -4117,14 +3930,13 @@ void GNUStyle::printHashTableSymbols(const Elf_Hash &SysVHash) { OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; OS << "\n"; - Elf_Sym_Range DynSyms = this->dumper().dynamic_symbols(); + Elf_Sym_Range DynSyms = this->dynamic_symbols(); const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0]; if (!FirstSym) { - Optional DynSymRegion = this->dumper().getDynSymRegion(); this->reportUniqueWarning( Twine("unable to print symbols for the .hash table: the " "dynamic symbol table ") + - (DynSymRegion ? "is empty" : "was not found")); + (this->DynSymRegion ? "is empty" : "was not found")); return; } @@ -4145,26 +3957,24 @@ void GNUStyle::printHashTableSymbols(const Elf_Hash &SysVHash) { break; } - printHashedSymbol(FirstSym + Ch, Ch, StringTable, Buc); + printHashedSymbol(FirstSym + Ch, Ch, this->DynamicStringTable, Buc); Visited[Ch] = true; } } } template -void GNUStyle::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { - StringRef StringTable = this->dumper().getDynamicStringTable(); - if (StringTable.empty()) +void GNUELFDumper::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { + if (this->DynamicStringTable.empty()) return; - Elf_Sym_Range DynSyms = this->dumper().dynamic_symbols(); + Elf_Sym_Range DynSyms = this->dynamic_symbols(); const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0]; - Optional DynSymRegion = this->dumper().getDynSymRegion(); if (!FirstSym) { this->reportUniqueWarning( Twine("unable to print symbols for the .gnu.hash table: the " "dynamic symbol table ") + - (DynSymRegion ? "is empty" : "was not found")); + (this->DynSymRegion ? "is empty" : "was not found")); return; } @@ -4182,7 +3992,7 @@ void GNUStyle::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { }; Expected> ValuesOrErr = - getGnuHashTableChains(DynSymRegion, &GnuHash); + getGnuHashTableChains(this->DynSymRegion, &GnuHash); ArrayRef Values; if (!ValuesOrErr) this->reportUniqueWarning("unable to get hash values for the SHT_GNU_HASH " @@ -4200,7 +4010,7 @@ void GNUStyle::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { while (true) { uint32_t SymIndex = Index++; if (const Elf_Sym *Sym = GetSymbol(SymIndex, DynSyms.size())) - printHashedSymbol(Sym, SymIndex, StringTable, Buc); + printHashedSymbol(Sym, SymIndex, this->DynamicStringTable, Buc); else break; @@ -4220,17 +4030,17 @@ void GNUStyle::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { } } -template void GNUStyle::printHashSymbols() { - if (const Elf_Hash *SysVHash = this->dumper().getHashTable()) { +template void GNUELFDumper::printHashSymbols() { + if (this->HashTable) { OS << "\n Symbol table of .hash for image:\n"; - if (Error E = checkHashTable(this->dumper(), SysVHash)) + if (Error E = checkHashTable(*this, this->HashTable)) this->reportUniqueWarning(std::move(E)); else - printHashTableSymbols(*SysVHash); + printHashTableSymbols(*this->HashTable); } // Try printing the .gnu.hash table. - if (const Elf_GnuHash *GnuHash = this->dumper().getGnuHashTable()) { + if (this->GnuHashTable) { OS << "\n Symbol table of .gnu.hash for image:\n"; if (ELFT::Is64Bits) OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; @@ -4238,14 +4048,14 @@ template void GNUStyle::printHashSymbols() { OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; OS << "\n"; - if (Error E = checkGNUHashTable(this->Obj, GnuHash)) + if (Error E = checkGNUHashTable(this->Obj, this->GnuHashTable)) this->reportUniqueWarning(std::move(E)); else - printGnuHashTableSymbols(*GnuHash); + printGnuHashTableSymbols(*this->GnuHashTable); } } -template void GNUStyle::printSectionDetails() { +template void GNUELFDumper::printSectionDetails() { ArrayRef Sections = cantFail(this->Obj.sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " @@ -4273,8 +4083,8 @@ template void GNUStyle::printSectionDetails() { PrintFields({{"Flags", 7}}); StringRef SecStrTable; - if (Expected SecStrTableOrErr = this->Obj.getSectionStringTable( - Sections, this->dumper().WarningHandler)) + if (Expected SecStrTableOrErr = + this->Obj.getSectionStringTable(Sections, this->WarningHandler)) SecStrTable = *SecStrTableOrErr; else this->reportUniqueWarning(SecStrTableOrErr.takeError()); @@ -4435,7 +4245,7 @@ static bool checkPTDynamic(const typename ELFT::Phdr &Phdr, } template -void GNUStyle::printProgramHeaders( +void GNUELFDumper::printProgramHeaders( bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) printProgramHeaders(); @@ -4446,7 +4256,7 @@ void GNUStyle::printProgramHeaders( printSectionMapping(); } -template void GNUStyle::printProgramHeaders() { +template void GNUELFDumper::printProgramHeaders() { unsigned Bias = ELFT::Is64Bits ? 8 : 0; const Elf_Ehdr &Header = this->Obj.getHeader(); Field Fields[8] = {2, 17, 26, 37 + Bias, @@ -4515,7 +4325,7 @@ template void GNUStyle::printProgramHeaders() { } } -template void GNUStyle::printSectionMapping() { +template void GNUELFDumper::printSectionMapping() { OS << "\n Section to Segment mapping:\n Segment Sections...\n"; DenseSet BelongsToSegment; int Phnum = 0; @@ -4611,11 +4421,6 @@ RelSymbol getSymbolForReloc(const ELFDumper &Dumper, } } // namespace -template -void GNUStyle::printDynamicReloc(const Relocation &R) { - printRelRelaReloc(R, getSymbolForReloc(this->dumper(), R)); -} - template static size_t getMaxDynamicTagSize(const ELFFile &Obj, typename ELFT::DynRange Tags) { @@ -4625,14 +4430,13 @@ static size_t getMaxDynamicTagSize(const ELFFile &Obj, return Max; } -template void GNUStyle::printDynamic() { - Elf_Dyn_Range Table = this->dumper().dynamic_table(); +template void GNUELFDumper::printDynamicTable() { + Elf_Dyn_Range Table = this->dynamic_table(); if (Table.empty()) return; OS << "Dynamic section at offset " - << format_hex(reinterpret_cast( - this->dumper().getDynamicTableRegion().Addr) - + << format_hex(reinterpret_cast(this->DynamicTable.Addr) - this->Obj.base(), 1) << " contains " << Table.size() << " entries:\n"; @@ -4650,18 +4454,23 @@ template void GNUStyle::printDynamic() { uintX_t Tag = Entry.getTag(); std::string Type = std::string("(") + this->Obj.getDynamicTagAsString(Tag).c_str() + ")"; - std::string Value = this->dumper().getDynamicEntry(Tag, Entry.getVal()); + std::string Value = this->getDynamicEntry(Tag, Entry.getVal()); OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10) << format(ValueFmt.c_str(), Type.c_str()) << Value << "\n"; } } -template void GNUStyle::printDynamicRelocations() { +template void GNUELFDumper::printDynamicRelocations() { this->printDynamicRelocationsHelper(); } template -void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { +void ELFDumper::printDynamicReloc(const Relocation &R) { + printRelRelaReloc(R, getSymbolForReloc(*this, R)); +} + +template +void ELFDumper::printRelocationsHelper(const Elf_Shdr &Sec) { this->forEachRelocationDo( Sec, opts::RawRelr, [&](const Relocation &R, unsigned Ndx, const Elf_Shdr &Sec, @@ -4669,46 +4478,43 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { [&](const Elf_Relr &R) { printRelrReloc(R); }); } -template void DumpStyle::printDynamicRelocationsHelper() { +template void ELFDumper::printDynamicRelocationsHelper() { const bool IsMips64EL = this->Obj.isMips64EL(); - const DynRegionInfo &DynRelaRegion = this->dumper().getDynRelaRegion(); - if (DynRelaRegion.Size > 0) { - printDynamicRelocHeader(ELF::SHT_RELA, "RELA", DynRelaRegion); - for (const Elf_Rela &Rela : this->dumper().dyn_relas()) + if ( this->DynRelaRegion.Size > 0) { + printDynamicRelocHeader(ELF::SHT_RELA, "RELA", this->DynRelaRegion); + for (const Elf_Rela &Rela : this->DynRelaRegion.getAsArrayRef()) printDynamicReloc(Relocation(Rela, IsMips64EL)); } - const DynRegionInfo &DynRelRegion = this->dumper().getDynRelRegion(); - if (DynRelRegion.Size > 0) { - printDynamicRelocHeader(ELF::SHT_REL, "REL", DynRelRegion); - for (const Elf_Rel &Rel : this->dumper().dyn_rels()) + if (this->DynRelRegion.Size > 0) { + printDynamicRelocHeader(ELF::SHT_REL, "REL", this->DynRelRegion); + for (const Elf_Rel &Rel : this->DynRelRegion.getAsArrayRef()) printDynamicReloc(Relocation(Rel, IsMips64EL)); } - const DynRegionInfo &DynRelrRegion = this->dumper().getDynRelrRegion(); - if (DynRelrRegion.Size > 0) { - printDynamicRelocHeader(ELF::SHT_REL, "RELR", DynRelrRegion); - Elf_Relr_Range Relrs = this->dumper().dyn_relrs(); + if (this->DynRelrRegion.Size > 0) { + printDynamicRelocHeader(ELF::SHT_REL, "RELR", this->DynRelrRegion); + Elf_Relr_Range Relrs = this->DynRelrRegion.getAsArrayRef(); for (const Elf_Rel &Rel : Obj.decode_relrs(Relrs)) printDynamicReloc(Relocation(Rel, IsMips64EL)); } - const DynRegionInfo &DynPLTRelRegion = this->dumper().getDynPLTRelRegion(); - if (DynPLTRelRegion.Size) { - if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { - printDynamicRelocHeader(ELF::SHT_RELA, "PLT", DynPLTRelRegion); - for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) + if (this->DynPLTRelRegion.Size) { + if (this->DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { + printDynamicRelocHeader(ELF::SHT_RELA, "PLT", this->DynPLTRelRegion); + for (const Elf_Rela &Rela : + this->DynPLTRelRegion.getAsArrayRef()) printDynamicReloc(Relocation(Rela, IsMips64EL)); } else { - printDynamicRelocHeader(ELF::SHT_REL, "PLT", DynPLTRelRegion); - for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) + printDynamicRelocHeader(ELF::SHT_REL, "PLT", this->DynPLTRelRegion); + for (const Elf_Rel &Rel : this->DynPLTRelRegion.getAsArrayRef()) printDynamicReloc(Relocation(Rel, IsMips64EL)); } } } template -void GNUStyle::printGNUVersionSectionProlog( +void GNUELFDumper::printGNUVersionSectionProlog( const typename ELFT::Shdr &Sec, const Twine &Label, unsigned EntriesNum) { // Don't inline the SecName, because it might report a warning to stderr and // corrupt the output. @@ -4722,7 +4528,7 @@ void GNUStyle::printGNUVersionSectionProlog( LinkedSecName = this->getPrintableSectionName(**LinkedSecOrErr); else this->reportUniqueWarning("invalid section linked to " + - describe(this->Obj, Sec) + ": " + + this->describe(Sec) + ": " + toString(LinkedSecOrErr.takeError())); OS << " Addr: " << format_hex_no_prefix(Sec.sh_addr, 16) @@ -4731,15 +4537,15 @@ void GNUStyle::printGNUVersionSectionProlog( } template -void GNUStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { +void GNUELFDumper::printVersionSymbolSection(const Elf_Shdr *Sec) { if (!Sec) return; printGNUVersionSectionProlog(*Sec, "Version symbols", Sec->sh_size / sizeof(Elf_Versym)); Expected> VerTableOrErr = - this->dumper().getVersionTable(*Sec, /*SymTab=*/nullptr, - /*StrTab=*/nullptr); + this->getVersionTable(*Sec, /*SymTab=*/nullptr, + /*StrTab=*/nullptr); if (!VerTableOrErr) { this->reportUniqueWarning(VerTableOrErr.takeError()); return; @@ -4756,10 +4562,10 @@ void GNUStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { bool IsDefault; Expected NameOrErr = - this->dumper().getSymbolVersionByIndex(Ndx, IsDefault); + this->getSymbolVersionByIndex(Ndx, IsDefault); if (!NameOrErr) { this->reportUniqueWarning("unable to get a version for entry " + - Twine(I) + " of " + describe(this->Obj, *Sec) + + Twine(I) + " of " + this->describe(*Sec) + ": " + toString(NameOrErr.takeError())); Versions.emplace_back(""); continue; @@ -4804,13 +4610,13 @@ static std::string versionFlagToString(unsigned Flags) { } template -void GNUStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { +void GNUELFDumper::printVersionDefinitionSection(const Elf_Shdr *Sec) { if (!Sec) return; printGNUVersionSectionProlog(*Sec, "Version definition", Sec->sh_info); - Expected> V = this->dumper().getVersionDefinitions(*Sec); + Expected> V = this->getVersionDefinitions(*Sec); if (!V) { this->reportUniqueWarning(V.takeError()); return; @@ -4831,15 +4637,14 @@ void GNUStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { } template -void GNUStyle::printVersionDependencySection(const Elf_Shdr *Sec) { +void GNUELFDumper::printVersionDependencySection(const Elf_Shdr *Sec) { if (!Sec) return; unsigned VerneedNum = Sec->sh_info; printGNUVersionSectionProlog(*Sec, "Version needs", VerneedNum); - Expected> V = - this->dumper().getVersionDependencies(*Sec); + Expected> V = this->getVersionDependencies(*Sec); if (!V) { this->reportUniqueWarning(V.takeError()); return; @@ -4857,7 +4662,7 @@ void GNUStyle::printVersionDependencySection(const Elf_Shdr *Sec) { } template -void GNUStyle::printHashHistogram(const Elf_Hash &HashTable) { +void GNUELFDumper::printHashHistogram(const Elf_Hash &HashTable) { size_t NBucket = HashTable.nbucket; size_t NChain = HashTable.nchain; ArrayRef Buckets = HashTable.buckets(); @@ -4912,9 +4717,10 @@ void GNUStyle::printHashHistogram(const Elf_Hash &HashTable) { } template -void GNUStyle::printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) { - Expected> ChainsOrErr = getGnuHashTableChains( - this->dumper().getDynSymRegion(), &GnuHashTable); +void GNUELFDumper::printGnuHashHistogram( + const Elf_GnuHash &GnuHashTable) { + Expected> ChainsOrErr = + getGnuHashTableChains(this->DynSymRegion, &GnuHashTable); if (!ChainsOrErr) { this->reportUniqueWarning("unable to print the GNU hash table histogram: " + toString(ChainsOrErr.takeError())); @@ -4969,25 +4775,25 @@ void GNUStyle::printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) { // dynamic symbol table. The table shows the number of hash buckets for // different lengths of chains as an absolute number and percentage of the total // buckets, and the cumulative coverage of symbols for each set of buckets. -template void GNUStyle::printHashHistograms() { +template void GNUELFDumper::printHashHistograms() { // Print histogram for the .hash section. - if (const Elf_Hash *HashTable = this->dumper().getHashTable()) { - if (Error E = checkHashTable(this->dumper(), HashTable)) + if (this->HashTable) { + if (Error E = checkHashTable(*this, this->HashTable)) this->reportUniqueWarning(std::move(E)); else - printHashHistogram(*HashTable); + printHashHistogram(*this->HashTable); } // Print histogram for the .gnu.hash section. - if (const Elf_GnuHash *GnuHashTable = this->dumper().getGnuHashTable()) { - if (Error E = checkGNUHashTable(this->Obj, GnuHashTable)) + if (this->GnuHashTable) { + if (Error E = checkGNUHashTable(this->Obj, this->GnuHashTable)) this->reportUniqueWarning(std::move(E)); else - printGnuHashHistogram(*GnuHashTable); + printGnuHashHistogram(*this->GnuHashTable); } } -template void GNUStyle::printCGProfile() { +template void GNUELFDumper::printCGProfile() { OS << "GNUStyle::printCGProfile not implemented\n"; } @@ -5021,19 +4827,18 @@ decodeAddrsigSection(const ELFFile &Obj, const typename ELFT::Shdr &Sec) { toString(SymsOrErr.takeError())); } -template void GNUStyle::printAddrsig() { - const Elf_Shdr *Sec = this->dumper().getDotAddrsigSec(); - if (!Sec) +template void GNUELFDumper::printAddrsig() { + if (!this->DotAddrsigSec) return; Expected> SymsOrErr = - decodeAddrsigSection(this->Obj, *Sec); + decodeAddrsigSection(this->Obj, *this->DotAddrsigSec); if (!SymsOrErr) { this->reportUniqueWarning(SymsOrErr.takeError()); return; } - StringRef Name = this->getPrintableSectionName(*Sec); + StringRef Name = this->getPrintableSectionName(*this->DotAddrsigSec); OS << "\nAddress-significant symbols section '" << Name << "'" << " contains " << SymsOrErr->size() << " entries:\n"; OS << " Num: Name\n"; @@ -5042,7 +4847,7 @@ template void GNUStyle::printAddrsig() { size_t SymIndex = 0; for (uint64_t Sym : *SymsOrErr) { Fields[0].Str = to_string(format_decimal(++SymIndex, 6)) + ":"; - Fields[1].Str = this->dumper().getStaticSymbolName(Sym); + Fields[1].Str = this->getStaticSymbolName(Sym); for (const Field &Entry : Fields) printField(Entry); OS << "\n"; @@ -5601,7 +5406,7 @@ static void printNotesHelper( } } -template void GNUStyle::printNotes() { +template void GNUELFDumper::printNotes() { auto PrintHeader = [&](Optional SecName, const typename ELFT::Off Offset, const typename ELFT::Addr Size) { @@ -5663,15 +5468,15 @@ template void GNUStyle::printNotes() { return Error::success(); }; - printNotesHelper(this->dumper(), PrintHeader, ProcessNote, []() {}); + printNotesHelper(*this, PrintHeader, ProcessNote, []() {}); } -template void GNUStyle::printELFLinkerOptions() { +template void GNUELFDumper::printELFLinkerOptions() { OS << "printELFLinkerOptions not implemented!\n"; } template -void DumpStyle::printDependentLibsHelper( +void ELFDumper::printDependentLibsHelper( function_ref OnSectionStart, function_ref OnLibEntry) { auto Warn = [this](unsigned SecNdx, StringRef Msg) { @@ -5708,7 +5513,7 @@ void DumpStyle::printDependentLibsHelper( } template -void DumpStyle::forEachRelocationDo( +void ELFDumper::forEachRelocationDo( const Elf_Shdr &Sec, bool RawRelr, llvm::function_ref &, unsigned, const Elf_Shdr &, const Elf_Shdr *)> @@ -5716,7 +5521,7 @@ void DumpStyle::forEachRelocationDo( llvm::function_ref RelrFn) { auto Warn = [&](Error &&E, const Twine &Prefix = "unable to read relocations from") { - this->reportUniqueWarning(Prefix + " " + describe(Obj, Sec) + ": " + + this->reportUniqueWarning(Prefix + " " + describe(Sec) + ": " + toString(std::move(E))); }; @@ -5783,19 +5588,18 @@ void DumpStyle::forEachRelocationDo( } template -StringRef DumpStyle::getPrintableSectionName(const Elf_Shdr &Sec) const { +StringRef ELFDumper::getPrintableSectionName(const Elf_Shdr &Sec) const { StringRef Name = ""; if (Expected SecNameOrErr = - Obj.getSectionName(Sec, this->dumper().WarningHandler)) + Obj.getSectionName(Sec, this->WarningHandler)) Name = *SecNameOrErr; else - this->reportUniqueWarning("unable to get the name of " + - describe(Obj, Sec) + ": " + - toString(SecNameOrErr.takeError())); + this->reportUniqueWarning("unable to get the name of " + describe(Sec) + + ": " + toString(SecNameOrErr.takeError())); return Name; } -template void GNUStyle::printDependentLibs() { +template void GNUELFDumper::printDependentLibs() { bool SectionStarted = false; struct NameOffset { StringRef Name; @@ -5831,12 +5635,12 @@ template void GNUStyle::printDependentLibs() { } template -bool DumpStyle::printFunctionStackSize( +bool ELFDumper::printFunctionStackSize( uint64_t SymValue, Optional FunctionSec, const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset) { uint32_t FuncSymIndex = 0; - if (const Elf_Shdr *SymTab = this->dumper().getDotSymtabSec()) { - if (Expected SymsOrError = Obj.symbols(SymTab)) { + if (this->DotSymtabSec) { + if (Expected SymsOrError = Obj.symbols(this->DotSymtabSec)) { uint32_t Index = (uint32_t)-1; for (const Elf_Sym &Sym : *SymsOrError) { ++Index; @@ -5845,11 +5649,11 @@ bool DumpStyle::printFunctionStackSize( continue; if (Expected SymAddrOrErr = - ElfObj.toSymbolRef(SymTab, Index).getAddress()) { + ObjF.toSymbolRef(this->DotSymtabSec, Index).getAddress()) { if (SymValue != *SymAddrOrErr) continue; } else { - std::string Name = this->dumper().getStaticSymbolName(Index); + std::string Name = this->getStaticSymbolName(Index); reportUniqueWarning("unable to get address of symbol '" + Name + "': " + toString(SymAddrOrErr.takeError())); break; @@ -5859,11 +5663,11 @@ bool DumpStyle::printFunctionStackSize( // means "any section". if (FunctionSec) { if (Expected SecOrErr = - Obj.getSection(Sym, SymTab, this->dumper().getShndxTable())) { + Obj.getSection(Sym, this->DotSymtabSec, this->ShndxTable)) { if (*FunctionSec != *SecOrErr) continue; } else { - std::string Name = this->dumper().getStaticSymbolName(Index); + std::string Name = this->getStaticSymbolName(Index); // Note: it is impossible to trigger this error currently, it is // untested. reportUniqueWarning("unable to get section of symbol '" + Name + @@ -5885,9 +5689,9 @@ bool DumpStyle::printFunctionStackSize( if (!FuncSymIndex) reportUniqueWarning( "could not identify function symbol for stack size entry in " + - describe(Obj, StackSizeSec)); + describe(StackSizeSec)); else - FuncName = this->dumper().getStaticSymbolName(FuncSymIndex); + FuncName = this->getStaticSymbolName(FuncSymIndex); // Extract the size. The expectation is that Offset is pointing to the right // place, i.e. past the function address. @@ -5895,7 +5699,7 @@ bool DumpStyle::printFunctionStackSize( uint64_t StackSize = Data.getULEB128(Offset, &Err); if (Err) { reportUniqueWarning("could not extract a valid stack size from " + - describe(Obj, StackSizeSec) + ": " + + describe(StackSizeSec) + ": " + toString(std::move(Err))); return false; } @@ -5904,7 +5708,8 @@ bool DumpStyle::printFunctionStackSize( } template -void GNUStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { +void GNUELFDumper::printStackSizeEntry(uint64_t Size, + StringRef FuncName) { OS.PadToColumn(2); OS << format_decimal(Size, 11); OS.PadToColumn(18); @@ -5912,7 +5717,7 @@ void GNUStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { } template -void DumpStyle::printStackSize(const Relocation &R, +void ELFDumper::printStackSize(const Relocation &R, const Elf_Shdr &RelocSec, unsigned Ndx, const Elf_Shdr *SymTab, const Elf_Shdr *FunctionSec, @@ -5922,11 +5727,10 @@ void DumpStyle::printStackSize(const Relocation &R, // This function ignores potentially erroneous input, unless it is directly // related to stack size reporting. const Elf_Sym *Sym = nullptr; - Expected> TargetOrErr = - this->dumper().getRelocationTarget(R, SymTab); + Expected> TargetOrErr = this->getRelocationTarget(R, SymTab); if (!TargetOrErr) reportUniqueWarning("unable to get the target of relocation with index " + - Twine(Ndx) + " in " + describe(Obj, RelocSec) + ": " + + Twine(Ndx) + " in " + describe(RelocSec) + ": " + toString(TargetOrErr.takeError())); else Sym = TargetOrErr->Sym; @@ -5934,7 +5738,7 @@ void DumpStyle::printStackSize(const Relocation &R, uint64_t RelocSymValue = 0; if (Sym) { Expected SectionOrErr = - this->Obj.getSection(*Sym, SymTab, this->dumper().getShndxTable()); + this->Obj.getSection(*Sym, SymTab, this->ShndxTable); if (!SectionOrErr) { reportUniqueWarning( "cannot identify the section for relocation symbol '" + @@ -5954,7 +5758,7 @@ void DumpStyle::printStackSize(const Relocation &R, if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { reportUniqueWarning("found invalid relocation offset (0x" + Twine::utohexstr(Offset) + ") into " + - describe(Obj, StackSizeSec) + + describe(StackSizeSec) + " while trying to extract a stack size entry"); return; } @@ -5967,7 +5771,7 @@ void DumpStyle::printStackSize(const Relocation &R, } template -void DumpStyle::printNonRelocatableStackSizes( +void ELFDumper::printNonRelocatableStackSizes( std::function PrintHeader) { // This function ignores potentially erroneous input, unless it is directly // related to stack size reporting. @@ -5984,7 +5788,7 @@ void DumpStyle::printNonRelocatableStackSizes( // size. Check for an extra byte before we try to process the entry. if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { reportUniqueWarning( - describe(Obj, Sec) + + describe(Sec) + " ended while trying to extract a stack size entry"); break; } @@ -5997,7 +5801,7 @@ void DumpStyle::printNonRelocatableStackSizes( } template -void DumpStyle::printRelocatableStackSizes( +void ELFDumper::printRelocatableStackSizes( std::function PrintHeader) { // Build a map between stack size sections and their corresponding relocation // sections. @@ -6024,7 +5828,7 @@ void DumpStyle::printRelocatableStackSizes( Expected RelSecOrErr = Obj.getSection(Sec.sh_info); if (!RelSecOrErr) { - reportUniqueWarning(describe(Obj, Sec) + + reportUniqueWarning(describe(Sec) + ": failed to get a relocated section: " + toString(RelSecOrErr.takeError())); continue; @@ -6045,8 +5849,7 @@ void DumpStyle::printRelocatableStackSizes( // Warn about stack size sections without a relocation section. if (!RelocSec) { - reportWarning(createError(".stack_sizes (" + - describe(Obj, *StackSizesELFSec) + + reportWarning(createError(".stack_sizes (" + describe(*StackSizesELFSec) + ") does not have a corresponding " "relocation section"), FileName); @@ -6061,7 +5864,7 @@ void DumpStyle::printRelocatableStackSizes( SupportsRelocation IsSupportedFn; RelocationResolver Resolver; - std::tie(IsSupportedFn, Resolver) = getRelocationResolver(ElfObj); + std::tie(IsSupportedFn, Resolver) = getRelocationResolver(this->ObjF); ArrayRef Contents = unwrapOrError(this->FileName, Obj.getSectionContents(*StackSizesELFSec)); DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr)); @@ -6072,7 +5875,7 @@ void DumpStyle::printRelocatableStackSizes( const Elf_Shdr *SymTab) { if (!IsSupportedFn || !IsSupportedFn(R.Type)) { reportUniqueWarning( - describe(Obj, *RelocSec) + + describe(*RelocSec) + " contains an unsupported relocation with index " + Twine(Ndx) + ": " + Obj.getRelocationTypeName(R.Type)); return; @@ -6089,7 +5892,7 @@ void DumpStyle::printRelocatableStackSizes( } template -void GNUStyle::printStackSizes() { +void GNUELFDumper::printStackSizes() { bool HeaderHasBeenPrinted = false; auto PrintHeader = [&]() { if (HeaderHasBeenPrinted) @@ -6111,7 +5914,7 @@ void GNUStyle::printStackSizes() { } template -void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { +void GNUELFDumper::printMipsGOT(const MipsGOTParser &Parser) { size_t Bias = ELFT::Is64Bits ? 8 : 0; auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) { OS.PadToColumn(2); @@ -6161,9 +5964,9 @@ void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { OS << " Address Access Initial Sym.Val. Type Ndx Name\n"; for (auto &E : Parser.getGlobalEntries()) { const Elf_Sym &Sym = *Parser.getGotSym(&E); - const Elf_Sym &FirstSym = this->dumper().dynamic_symbols()[0]; - std::string SymName = this->dumper().getFullSymbolName( - Sym, &Sym - &FirstSym, this->dumper().getDynamicStringTable(), false); + const Elf_Sym &FirstSym = this->dynamic_symbols()[0]; + std::string SymName = this->getFullSymbolName( + Sym, &Sym - &FirstSym, this->DynamicStringTable, false); OS.PadToColumn(2); OS << to_string(format_hex_no_prefix(Parser.getGotAddress(&E), 8 + Bias)); @@ -6176,8 +5979,7 @@ void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { OS.PadToColumn(40 + 3 * Bias); OS << printEnum(Sym.getType(), makeArrayRef(ElfSymbolTypes)); OS.PadToColumn(48 + 3 * Bias); - OS << getSymbolSectionNdx( - Sym, &Sym - this->dumper().dynamic_symbols().begin()); + OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin()); OS.PadToColumn(52 + 3 * Bias); OS << SymName << "\n"; } @@ -6189,7 +5991,7 @@ void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { } template -void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { +void GNUELFDumper::printMipsPLT(const MipsGOTParser &Parser) { size_t Bias = ELFT::Is64Bits ? 8 : 0; auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) { OS.PadToColumn(2); @@ -6216,8 +6018,8 @@ void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { const Elf_Sym &Sym = *Parser.getPltSym(&E); const Elf_Sym &FirstSym = *cantFail( this->Obj.template getEntry(*Parser.getPltSymTable(), 0)); - std::string SymName = this->dumper().getFullSymbolName( - Sym, &Sym - &FirstSym, this->dumper().getDynamicStringTable(), false); + std::string SymName = this->getFullSymbolName( + Sym, &Sym - &FirstSym, this->DynamicStringTable, false); OS.PadToColumn(2); OS << to_string(format_hex_no_prefix(Parser.getPltAddress(&E), 8 + Bias)); @@ -6228,8 +6030,7 @@ void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { OS.PadToColumn(29 + 3 * Bias); OS << printEnum(Sym.getType(), makeArrayRef(ElfSymbolTypes)); OS.PadToColumn(37 + 3 * Bias); - OS << getSymbolSectionNdx( - Sym, &Sym - this->dumper().dynamic_symbols().begin()); + OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin()); OS.PadToColumn(41 + 3 * Bias); OS << SymName << "\n"; } @@ -6255,10 +6056,10 @@ getMipsAbiFlagsSection(const ELFDumper &Dumper) { return reinterpret_cast *>(DataOrErr->data()); } -template void GNUStyle::printMipsABIFlags() { +template void GNUELFDumper::printMipsABIFlags() { const Elf_Mips_ABIFlags *Flags = nullptr; if (Expected *> SecOrErr = - getMipsAbiFlagsSection(this->dumper())) + getMipsAbiFlagsSection(*this)) Flags = *SecOrErr; else this->reportUniqueWarning(SecOrErr.takeError()); @@ -6288,7 +6089,7 @@ template void GNUStyle::printMipsABIFlags() { OS << "\n"; } -template void LLVMStyle::printFileHeaders() { +template void LLVMELFDumper::printFileHeaders() { const Elf_Ehdr &E = this->Obj.getHeader(); { DictScope D(W, "ElfHeader"); @@ -6360,7 +6161,7 @@ template void LLVMStyle::printFileHeaders() { } } -template void LLVMStyle::printGroupSections() { +template void LLVMELFDumper::printGroupSections() { DictScope Lists(W, "Groups"); std::vector V = this->getGroups(); DenseMap Map = mapSectionsToGroups(V); @@ -6391,7 +6192,7 @@ template void LLVMStyle::printGroupSections() { W.startLine() << "There are no group sections in the file.\n"; } -template void LLVMStyle::printRelocations() { +template void LLVMELFDumper::printRelocations() { ListScope D(W, "Relocations"); for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { @@ -6408,28 +6209,15 @@ template void LLVMStyle::printRelocations() { } } -template void LLVMStyle::printRelrReloc(const Elf_Relr &R) { - W.startLine() << W.hex(R) << "\n"; -} - template -void LLVMStyle::printReloc(const Relocation &R, unsigned RelIndex, - const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { - Expected> Target = - this->dumper().getRelocationTarget(R, SymTab); - if (!Target) { - this->reportUniqueWarning("unable to print relocation " + Twine(RelIndex) + - " in " + describe(this->Obj, Sec) + ": " + - toString(Target.takeError())); - return; - } - - printRelRelaReloc(R, Target->Name); +void LLVMELFDumper::printRelrReloc(const Elf_Relr &R) { + W.startLine() << W.hex(R) << "\n"; } template -void LLVMStyle::printRelRelaReloc(const Relocation &R, - StringRef SymbolName) { +void LLVMELFDumper::printRelRelaReloc(const Relocation &R, + const RelSymbol &RelSym) { + StringRef SymbolName = RelSym.Name; SmallString<32> RelocName; this->Obj.getRelocationTypeName(R.Type, RelocName); @@ -6450,7 +6238,7 @@ void LLVMStyle::printRelRelaReloc(const Relocation &R, } } -template void LLVMStyle::printSectionHeaders() { +template void LLVMELFDumper::printSectionHeaders() { ListScope SectionsD(W, "Sections"); int SectionIndex = -1; @@ -6480,16 +6268,17 @@ template void LLVMStyle::printSectionHeaders() { if (opts::SectionSymbols) { ListScope D(W, "Symbols"); - if (const Elf_Shdr *Symtab = this->dumper().getDotSymtabSec()) { + if (this->DotSymtabSec) { StringRef StrTable = unwrapOrError( - this->FileName, this->Obj.getStringTableForSymtab(*Symtab)); + this->FileName, + this->Obj.getStringTableForSymtab(*this->DotSymtabSec)); - typename ELFT::SymRange Symbols = - unwrapOrError(this->FileName, this->Obj.symbols(Symtab)); + typename ELFT::SymRange Symbols = unwrapOrError( + this->FileName, this->Obj.symbols(this->DotSymtabSec)); for (const Elf_Sym &Sym : Symbols) { const Elf_Shdr *SymSec = unwrapOrError( - this->FileName, this->Obj.getSection( - Sym, Symtab, this->dumper().getShndxTable())); + this->FileName, + this->Obj.getSection(Sym, this->DotSymtabSec, this->ShndxTable)); if (SymSec == &Sec) printSymbol(Sym, &Sym - &Symbols[0], StrTable, false, false); } @@ -6507,8 +6296,8 @@ template void LLVMStyle::printSectionHeaders() { } template -void LLVMStyle::printSymbolSection(const Elf_Sym &Symbol, - unsigned SymIndex) { +void LLVMELFDumper::printSymbolSection(const Elf_Sym &Symbol, + unsigned SymIndex) const { auto GetSectionSpecialType = [&]() -> Optional { if (Symbol.isUndefined()) return StringRef("Undefined"); @@ -6531,7 +6320,7 @@ void LLVMStyle::printSymbolSection(const Elf_Sym &Symbol, } Expected SectionIndex = - this->dumper().getSymbolSectionIndex(Symbol, SymIndex); + this->getSymbolSectionIndex(Symbol, SymIndex); if (!SectionIndex) { assert(Symbol.st_shndx == SHN_XINDEX && "getSymbolSectionIndex should only fail due to an invalid " @@ -6542,11 +6331,11 @@ void LLVMStyle::printSymbolSection(const Elf_Sym &Symbol, } Expected SectionName = - this->dumper().getSymbolSectionName(Symbol, *SectionIndex); + this->getSymbolSectionName(Symbol, *SectionIndex); if (!SectionName) { // Don't report an invalid section name if the section headers are missing. // In such situations, all sections will be "invalid". - if (!this->dumper().getElfObject().sections().empty()) + if (!this->ObjF.sections().empty()) this->reportUniqueWarning(SectionName.takeError()); else consumeError(SectionName.takeError()); @@ -6557,11 +6346,12 @@ void LLVMStyle::printSymbolSection(const Elf_Sym &Symbol, } template -void LLVMStyle::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, - Optional StrTable, bool IsDynamic, - bool /*NonVisibilityBitsUsed*/) { +void LLVMELFDumper::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, + Optional StrTable, + bool IsDynamic, + bool /*NonVisibilityBitsUsed*/) const { std::string FullSymbolName = - this->dumper().getFullSymbolName(Symbol, SymIndex, StrTable, IsDynamic); + this->getFullSymbolName(Symbol, SymIndex, StrTable, IsDynamic); unsigned char SymbolType = Symbol.getType(); DictScope D(W, "Symbol"); @@ -6604,26 +6394,20 @@ void LLVMStyle::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, } template -void LLVMStyle::printSymbols(bool PrintSymbols, - bool PrintDynamicSymbols) { - if (PrintSymbols) - printSymbols(); - if (PrintDynamicSymbols) - printDynamicSymbols(); -} - -template void LLVMStyle::printSymbols() { - ListScope Group(W, "Symbols"); - this->dumper().printSymbolsHelper(false); -} - -template void LLVMStyle::printDynamicSymbols() { - ListScope Group(W, "DynamicSymbols"); - this->dumper().printSymbolsHelper(true); +void LLVMELFDumper::printSymbols(bool PrintSymbols, + bool PrintDynamicSymbols) { + if (PrintSymbols) { + ListScope Group(W, "Symbols"); + this->printSymbolsHelper(false); + } + if (PrintDynamicSymbols) { + ListScope Group(W, "DynamicSymbols"); + this->printSymbolsHelper(true); + } } -template void LLVMStyle::printDynamic() { - Elf_Dyn_Range Table = this->dumper().dynamic_table(); +template void LLVMELFDumper::printDynamicTable() { + Elf_Dyn_Range Table = this->dynamic_table(); if (Table.empty()) return; @@ -6639,7 +6423,7 @@ template void LLVMStyle::printDynamic() { std::string ValueFmt = "%-" + std::to_string(MaxTagSize) + "s "; for (auto Entry : Table) { uintX_t Tag = Entry.getTag(); - std::string Value = this->dumper().getDynamicEntry(Tag, Entry.getVal()); + std::string Value = this->getDynamicEntry(Tag, Entry.getVal()); W.startLine() << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10, true) << " " << format(ValueFmt.c_str(), @@ -6649,7 +6433,7 @@ template void LLVMStyle::printDynamic() { W.startLine() << "]\n"; } -template void LLVMStyle::printDynamicRelocations() { +template void LLVMELFDumper::printDynamicRelocations() { W.startLine() << "Dynamic Relocations {\n"; W.indent(); this->printDynamicRelocationsHelper(); @@ -6658,13 +6442,7 @@ template void LLVMStyle::printDynamicRelocations() { } template -void LLVMStyle::printDynamicReloc(const Relocation &R) { - RelSymbol S = getSymbolForReloc(this->dumper(), R); - printRelRelaReloc(R, S.Name); -} - -template -void LLVMStyle::printProgramHeaders( +void LLVMELFDumper::printProgramHeaders( bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) printProgramHeaders(); @@ -6672,7 +6450,7 @@ void LLVMStyle::printProgramHeaders( printSectionMapping(); } -template void LLVMStyle::printProgramHeaders() { +template void LLVMELFDumper::printProgramHeaders() { ListScope L(W, "ProgramHeaders"); Expected> PhdrsOrErr = this->Obj.program_headers(); @@ -6699,7 +6477,7 @@ template void LLVMStyle::printProgramHeaders() { } template -void LLVMStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { +void LLVMELFDumper::printVersionSymbolSection(const Elf_Shdr *Sec) { ListScope SS(W, "VersionSymbols"); if (!Sec) return; @@ -6707,7 +6485,7 @@ void LLVMStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { StringRef StrTable; ArrayRef Syms; Expected> VerTableOrErr = - this->dumper().getVersionTable(*Sec, &Syms, &StrTable); + this->getVersionTable(*Sec, &Syms, &StrTable); if (!VerTableOrErr) { this->reportUniqueWarning(VerTableOrErr.takeError()); return; @@ -6719,7 +6497,7 @@ void LLVMStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { for (size_t I = 0, E = Syms.size(); I < E; ++I) { DictScope S(W, "Symbol"); W.printNumber("Version", (*VerTableOrErr)[I].vs_index & VERSYM_VERSION); - W.printString("Name", this->dumper().getFullSymbolName(Syms[I], I, StrTable, + W.printString("Name", this->getFullSymbolName(Syms[I], I, StrTable, /*IsDynamic=*/true)); } } @@ -6730,12 +6508,12 @@ static const EnumEntry SymVersionFlags[] = { {"Info", "INFO", VER_FLG_INFO}}; template -void LLVMStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { +void LLVMELFDumper::printVersionDefinitionSection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionDefinitions"); if (!Sec) return; - Expected> V = this->dumper().getVersionDefinitions(*Sec); + Expected> V = this->getVersionDefinitions(*Sec); if (!V) { this->reportUniqueWarning(V.takeError()); return; @@ -6755,12 +6533,12 @@ void LLVMStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { } template -void LLVMStyle::printVersionDependencySection(const Elf_Shdr *Sec) { +void LLVMELFDumper::printVersionDependencySection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionRequirements"); if (!Sec) return; - Expected> V = this->dumper().getVersionDependencies(*Sec); + Expected> V = this->getVersionDependencies(*Sec); if (!V) { this->reportUniqueWarning(V.takeError()); return; @@ -6783,18 +6561,18 @@ void LLVMStyle::printVersionDependencySection(const Elf_Shdr *Sec) { } } -template void LLVMStyle::printHashHistograms() { +template void LLVMELFDumper::printHashHistograms() { W.startLine() << "Hash Histogram not implemented!\n"; } -template void LLVMStyle::printCGProfile() { +template void LLVMELFDumper::printCGProfile() { ListScope L(W, "CGProfile"); - if (!this->dumper().getDotCGProfileSec()) + if (!this->DotCGProfileSec) return; Expected> CGProfileOrErr = this->Obj.template getSectionContentsAsArray( - *this->dumper().getDotCGProfileSec()); + *this->DotCGProfileSec); if (!CGProfileOrErr) { this->reportUniqueWarning( "unable to dump the SHT_LLVM_CALL_GRAPH_PROFILE section: " + @@ -6804,29 +6582,28 @@ template void LLVMStyle::printCGProfile() { for (const Elf_CGProfile &CGPE : *CGProfileOrErr) { DictScope D(W, "CGProfileEntry"); - W.printNumber("From", this->dumper().getStaticSymbolName(CGPE.cgp_from), + W.printNumber("From", this->getStaticSymbolName(CGPE.cgp_from), CGPE.cgp_from); - W.printNumber("To", this->dumper().getStaticSymbolName(CGPE.cgp_to), + W.printNumber("To", this->getStaticSymbolName(CGPE.cgp_to), CGPE.cgp_to); W.printNumber("Weight", CGPE.cgp_weight); } } -template void LLVMStyle::printAddrsig() { +template void LLVMELFDumper::printAddrsig() { ListScope L(W, "Addrsig"); - const Elf_Shdr *Sec = this->dumper().getDotAddrsigSec(); - if (!Sec) + if (!this->DotAddrsigSec) return; Expected> SymsOrErr = - decodeAddrsigSection(this->Obj, *Sec); + decodeAddrsigSection(this->Obj, *this->DotAddrsigSec); if (!SymsOrErr) { this->reportUniqueWarning(SymsOrErr.takeError()); return; } for (uint64_t Sym : *SymsOrErr) - W.printNumber("Sym", this->dumper().getStaticSymbolName(Sym), Sym); + W.printNumber("Sym", this->getStaticSymbolName(Sym), Sym); } template @@ -6871,7 +6648,7 @@ static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) { } } -template void LLVMStyle::printNotes() { +template void LLVMELFDumper::printNotes() { ListScope L(W, "Notes"); std::unique_ptr NoteScope; @@ -6932,10 +6709,10 @@ template void LLVMStyle::printNotes() { return Error::success(); }; - printNotesHelper(this->dumper(), StartNotes, ProcessNote, EndNotes); + printNotesHelper(*this, StartNotes, ProcessNote, EndNotes); } -template void LLVMStyle::printELFLinkerOptions() { +template void LLVMELFDumper::printELFLinkerOptions() { ListScope L(W, "LinkerOptions"); unsigned I = -1; @@ -6979,15 +6756,14 @@ template void LLVMStyle::printELFLinkerOptions() { } } -template void LLVMStyle::printDependentLibs() { +template void LLVMELFDumper::printDependentLibs() { ListScope L(W, "DependentLibs"); this->printDependentLibsHelper( [](const Elf_Shdr &) {}, [this](StringRef Lib, uint64_t) { W.printString(Lib); }); } -template -void LLVMStyle::printStackSizes() { +template void LLVMELFDumper::printStackSizes() { ListScope L(W, "StackSizes"); if (this->Obj.getHeader().e_type == ELF::ET_REL) this->printRelocatableStackSizes([]() {}); @@ -6996,14 +6772,14 @@ void LLVMStyle::printStackSizes() { } template -void LLVMStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { +void LLVMELFDumper::printStackSizeEntry(uint64_t Size, StringRef FuncName) { DictScope D(W, "Entry"); W.printString("Function", FuncName); W.printHex("Size", Size); } template -void LLVMStyle::printMipsGOT(const MipsGOTParser &Parser) { +void LLVMELFDumper::printMipsGOT(const MipsGOTParser &Parser) { auto PrintEntry = [&](const Elf_Addr *E) { W.printHex("Address", Parser.getGotAddress(E)); W.printNumber("Access", Parser.getGotOffset(E)); @@ -7049,11 +6825,11 @@ void LLVMStyle::printMipsGOT(const MipsGOTParser &Parser) { W.printHex("Value", Sym.st_value); W.printEnum("Type", Sym.getType(), makeArrayRef(ElfSymbolTypes)); - const unsigned SymIndex = &Sym - this->dumper().dynamic_symbols().begin(); + const unsigned SymIndex = &Sym - this->dynamic_symbols().begin(); printSymbolSection(Sym, SymIndex); - std::string SymName = this->dumper().getFullSymbolName( - Sym, SymIndex, this->dumper().getDynamicStringTable(), true); + std::string SymName = this->getFullSymbolName( + Sym, SymIndex, this->DynamicStringTable, true); W.printNumber("Name", SymName, Sym.st_name); } } @@ -7063,7 +6839,7 @@ void LLVMStyle::printMipsGOT(const MipsGOTParser &Parser) { } template -void LLVMStyle::printMipsPLT(const MipsGOTParser &Parser) { +void LLVMELFDumper::printMipsPLT(const MipsGOTParser &Parser) { auto PrintEntry = [&](const Elf_Addr *E) { W.printHex("Address", Parser.getPltAddress(E)); W.printHex("Initial", *E); @@ -7094,21 +6870,21 @@ void LLVMStyle::printMipsPLT(const MipsGOTParser &Parser) { const Elf_Sym &Sym = *Parser.getPltSym(&E); W.printHex("Value", Sym.st_value); W.printEnum("Type", Sym.getType(), makeArrayRef(ElfSymbolTypes)); - printSymbolSection(Sym, &Sym - this->dumper().dynamic_symbols().begin()); + printSymbolSection(Sym, &Sym - this->dynamic_symbols().begin()); const Elf_Sym *FirstSym = cantFail( this->Obj.template getEntry(*Parser.getPltSymTable(), 0)); - std::string SymName = this->dumper().getFullSymbolName( + std::string SymName = this->getFullSymbolName( Sym, &Sym - FirstSym, Parser.getPltStrTable(), true); W.printNumber("Name", SymName, Sym.st_name); } } } -template void LLVMStyle::printMipsABIFlags() { +template void LLVMELFDumper::printMipsABIFlags() { const Elf_Mips_ABIFlags *Flags; if (Expected *> SecOrErr = - getMipsAbiFlagsSection(this->dumper())) { + getMipsAbiFlagsSection(*this)) { Flags = *SecOrErr; if (!Flags) { W.startLine() << "There is no .MIPS.abiflags section in the file.\n"; From 1e11402aa8e25d88c095a1c70fc87d2d9775186b Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 12 Jan 2021 13:01:15 +0300 Subject: [PATCH 07/86] [llvm-readobj] - Add 'override' to fix build bots. This should fix bots after landing D93900. An example of error is: /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp:883:8: warning: 'printSectionMapping' overrides a member function but is not marked 'override' [-Winconsistent-missing-override] void printSectionMapping() {} --- llvm/tools/llvm-readobj/ELFDumper.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index a09ee6d630d783..44608b8c9a06b2 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -831,8 +831,8 @@ template class GNUELFDumper : public ELFDumper { std::string getSymbolSectionNdx(const Elf_Sym &Symbol, unsigned SymIndex) const; - void printProgramHeaders(); - void printSectionMapping(); + void printProgramHeaders() override; + void printSectionMapping() override; void printGNUVersionSectionProlog(const typename ELFT::Shdr &Sec, const Twine &Label, unsigned EntriesNum); @@ -879,8 +879,8 @@ template class LLVMELFDumper : public ELFDumper { void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, Optional StrTable, bool IsDynamic, bool /*NonVisibilityBitsUsed*/) const override; - void printProgramHeaders(); - void printSectionMapping() {} + void printProgramHeaders() override; + void printSectionMapping() override {} void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; From cc91efdabee05f749cb42e45aef1b45431844ade Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 12 Jan 2021 13:09:49 +0300 Subject: [PATCH 08/86] [llvm-readobj] - An attempt to fix BB. This adds the `template` keyword for 'getAsArrayRef' calls. An example of error: /b/1/openmp-gcc-x86_64-linux-debian/llvm.src/llvm/tools/llvm-readobj/ELFDumper.cpp:4491:50: error: use 'template' keyword to treat 'getAsArrayRef' as a dependent template name for (const Elf_Rel &Rel : this->DynRelRegion.getAsArrayRef()) --- llvm/tools/llvm-readobj/ELFDumper.cpp | 28 +++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 44608b8c9a06b2..d18e1d416278a7 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -265,7 +265,7 @@ template class ELFDumper : public ObjDumper { // with a DT_NULL entry. However, sometimes the section content may // continue past the DT_NULL entry, so to dump the section correctly, // we first find the end of the entries by iterating over them. - Elf_Dyn_Range Table = DynamicTable.getAsArrayRef(); + Elf_Dyn_Range Table = DynamicTable.template getAsArrayRef(); size_t Size = 0; while (Size < Table.size()) @@ -278,7 +278,7 @@ template class ELFDumper : public ObjDumper { Elf_Sym_Range dynamic_symbols() const { if (!DynSymRegion) return Elf_Sym_Range(); - return DynSymRegion->getAsArrayRef(); + return DynSymRegion->template getAsArrayRef(); } const Elf_Shdr *findSectionByName(StringRef Name) const; @@ -1828,7 +1828,7 @@ void ELFDumper::loadDynamicTable() { sizeof(Elf_Dyn))); FromPhdr.SizePrintName = "PT_DYNAMIC size"; FromPhdr.EntSizePrintName = ""; - IsPhdrTableValid = !FromPhdr.getAsArrayRef().empty(); + IsPhdrTableValid = !FromPhdr.template getAsArrayRef().empty(); } // Locate the dynamic table described in a section header. @@ -1844,7 +1844,7 @@ void ELFDumper::loadDynamicTable() { FromSec = *RegOrErr; FromSec.Context = describe(*DynamicSec); FromSec.EntSizePrintName = ""; - IsSecTableValid = !FromSec.getAsArrayRef().empty(); + IsSecTableValid = !FromSec.template getAsArrayRef().empty(); } else { reportUniqueWarning("unable to read the dynamic table from " + describe(*DynamicSec) + ": " + @@ -2584,7 +2584,7 @@ getGnuHashTableChains(Optional DynSymRegion, return createError("no dynamic symbol table found"); ArrayRef DynSymTable = - DynSymRegion->getAsArrayRef(); + DynSymRegion->template getAsArrayRef(); size_t NumSyms = DynSymTable.size(); if (!NumSyms) return createError("the dynamic symbol table is empty"); @@ -4480,21 +4480,24 @@ void ELFDumper::printRelocationsHelper(const Elf_Shdr &Sec) { template void ELFDumper::printDynamicRelocationsHelper() { const bool IsMips64EL = this->Obj.isMips64EL(); - if ( this->DynRelaRegion.Size > 0) { - printDynamicRelocHeader(ELF::SHT_RELA, "RELA", this->DynRelaRegion); - for (const Elf_Rela &Rela : this->DynRelaRegion.getAsArrayRef()) + if (this->DynRelaRegion.Size > 0) { + printDynamicRelocHeader(ELF::SHT_RELA, "RELA", this->DynRelaRegion); + for (const Elf_Rela &Rela : + this->DynRelaRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rela, IsMips64EL)); } if (this->DynRelRegion.Size > 0) { printDynamicRelocHeader(ELF::SHT_REL, "REL", this->DynRelRegion); - for (const Elf_Rel &Rel : this->DynRelRegion.getAsArrayRef()) + for (const Elf_Rel &Rel : + this->DynRelRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rel, IsMips64EL)); } if (this->DynRelrRegion.Size > 0) { printDynamicRelocHeader(ELF::SHT_REL, "RELR", this->DynRelrRegion); - Elf_Relr_Range Relrs = this->DynRelrRegion.getAsArrayRef(); + Elf_Relr_Range Relrs = + this->DynRelrRegion.template getAsArrayRef(); for (const Elf_Rel &Rel : Obj.decode_relrs(Relrs)) printDynamicReloc(Relocation(Rel, IsMips64EL)); } @@ -4503,11 +4506,12 @@ template void ELFDumper::printDynamicRelocationsHelper() { if (this->DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { printDynamicRelocHeader(ELF::SHT_RELA, "PLT", this->DynPLTRelRegion); for (const Elf_Rela &Rela : - this->DynPLTRelRegion.getAsArrayRef()) + this->DynPLTRelRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rela, IsMips64EL)); } else { printDynamicRelocHeader(ELF::SHT_REL, "PLT", this->DynPLTRelRegion); - for (const Elf_Rel &Rel : this->DynPLTRelRegion.getAsArrayRef()) + for (const Elf_Rel &Rel : + this->DynPLTRelRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rel, IsMips64EL)); } } From 4744478b99f1a99b2cdf7e4070a93086e5ae4800 Mon Sep 17 00:00:00 2001 From: Sourabh Singh Tomar Date: Tue, 12 Jan 2021 15:43:33 +0530 Subject: [PATCH 09/86] [mlir][openmp][NFCI] Rename `continuationIP` to `continuationBlock` Argument is a `Block` not a `point`. --- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 70e35c7c7997f5..87ec35cc1c42b0 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -412,11 +412,11 @@ ModuleTranslation::convertOmpParallel(Operation &opInst, LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::BasicBlock &continuationIP) { + llvm::BasicBlock &continuationBlock) { // ParallelOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); convertOmpOpRegions(region, "omp.par.region", valueMapping, blockMapping, - *codeGenIP.getBlock(), continuationIP, builder, + *codeGenIP.getBlock(), continuationBlock, builder, bodyGenStatus); }; @@ -517,11 +517,11 @@ LogicalResult ModuleTranslation::convertOmpMaster(Operation &opInst, LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::BasicBlock &continuationIP) { + llvm::BasicBlock &continuationBlock) { // MasterOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); convertOmpOpRegions(region, "omp.master.region", valueMapping, blockMapping, - *codeGenIP.getBlock(), continuationIP, builder, + *codeGenIP.getBlock(), continuationBlock, builder, bodyGenStatus); }; From 891b4873c129e27755e90a9b8954b9f0d0e7c5a4 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 12 Jan 2021 13:17:23 +0300 Subject: [PATCH 10/86] [llvm-readobj] - One more attempt to fix BB. Add `this->` for `W`, which is the member of `ObjDumper` An example of error: readobj/ELFDumper.cpp:738:13: error: use of undeclared identifier 'W' assert(&W.getOStream() == &llvm::fouts()); --- llvm/tools/llvm-readobj/ELFDumper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index d18e1d416278a7..5e27a14d04750a 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -735,7 +735,7 @@ template class GNUELFDumper : public ELFDumper { GNUELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer) : ELFDumper(ObjF, Writer), OS(static_cast(Writer.getOStream())) { - assert(&W.getOStream() == &llvm::fouts()); + assert(&this->W.getOStream() == &llvm::fouts()); } void printFileHeaders() override; From c1e08f0073e35cf17c0a0343cf7efff914dbd66d Mon Sep 17 00:00:00 2001 From: Mikhail Maltsev Date: Tue, 12 Jan 2021 10:22:35 +0000 Subject: [PATCH 11/86] [clang][AST] Get rid of an alignment hack in DeclObjC.h [NFCI] This code currently uses a union object to increase the alignment of the type ObjCTypeParamList. The original intent of this trick was to be able to use the expression `this + 1` to access the beginning of a tail-allocated array of `ObjCTypeParamDecl *` pointers. The code has since been refactored and uses `llvm::TrailingObjects` to manage the tail-allocated array. This template takes care of alignment, so the hack is no longer necessary. This patch removes the union so that the `SourceRange` class can be used directly instead of being re-implemented with raw representations of source locations. Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D94224 --- clang/include/clang/AST/DeclObjC.h | 30 +++++------------------------- clang/lib/AST/DeclObjC.cpp | 4 +--- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h index 88cedbd91b6d48..b1bce069920c63 100644 --- a/clang/include/clang/AST/DeclObjC.h +++ b/clang/include/clang/AST/DeclObjC.h @@ -656,20 +656,8 @@ class ObjCTypeParamDecl : public TypedefNameDecl { /// \endcode class ObjCTypeParamList final : private llvm::TrailingObjects { - /// Stores the components of a SourceRange as a POD. - struct PODSourceRange { - unsigned Begin; - unsigned End; - }; - - union { - /// Location of the left and right angle brackets. - PODSourceRange Brackets; - - // Used only for alignment. - ObjCTypeParamDecl *AlignmentHack; - }; - + /// Location of the left and right angle brackets. + SourceRange Brackets; /// The number of parameters in the list, which are tail-allocated. unsigned NumParams; @@ -717,17 +705,9 @@ class ObjCTypeParamList final return *(end() - 1); } - SourceLocation getLAngleLoc() const { - return SourceLocation::getFromRawEncoding(Brackets.Begin); - } - - SourceLocation getRAngleLoc() const { - return SourceLocation::getFromRawEncoding(Brackets.End); - } - - SourceRange getSourceRange() const { - return SourceRange(getLAngleLoc(), getRAngleLoc()); - } + SourceLocation getLAngleLoc() const { return Brackets.getBegin(); } + SourceLocation getRAngleLoc() const { return Brackets.getEnd(); } + SourceRange getSourceRange() const { return Brackets; } /// Gather the default set of type arguments to be substituted for /// these type parameters when dealing with an unspecialized type. diff --git a/clang/lib/AST/DeclObjC.cpp b/clang/lib/AST/DeclObjC.cpp index 961230fb54cefa..5f82fcec90e3bb 100644 --- a/clang/lib/AST/DeclObjC.cpp +++ b/clang/lib/AST/DeclObjC.cpp @@ -1461,9 +1461,7 @@ SourceRange ObjCTypeParamDecl::getSourceRange() const { ObjCTypeParamList::ObjCTypeParamList(SourceLocation lAngleLoc, ArrayRef typeParams, SourceLocation rAngleLoc) - : NumParams(typeParams.size()) { - Brackets.Begin = lAngleLoc.getRawEncoding(); - Brackets.End = rAngleLoc.getRawEncoding(); + : Brackets(lAngleLoc, rAngleLoc), NumParams(typeParams.size()) { std::copy(typeParams.begin(), typeParams.end(), begin()); } From f264f9ad7df538357dfc8c5f318c5c8b0df3d99f Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 8 Jan 2021 13:40:29 +0000 Subject: [PATCH 12/86] [SlotIndexes] Fix and simplify basic block splitting Remove the InsertionPoint argument from SlotIndexes::insertMBBInMaps because it was confusing: what does it mean to insert a new block between two instructions, in the middle of an existing block? Instead, support the case that MachineBasicBlock::splitAt really needs, where the new block contains some instructions that are already in the maps because they have been moved there from the tail of the previous block. In all other use cases the new block is empty. Based on work by Carl Ritson! Differential Revision: https://reviews.llvm.org/D94311 --- llvm/include/llvm/CodeGen/LiveIntervals.h | 5 +-- llvm/include/llvm/CodeGen/SlotIndexes.h | 45 +++++++++-------------- llvm/lib/CodeGen/MachineBasicBlock.cpp | 2 +- llvm/unittests/MI/LiveIntervalTest.cpp | 41 +++++++++++++++++++++ 4 files changed, 61 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index 1a6b59a8959e20..fa08166791b06b 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -256,9 +256,8 @@ class VirtRegMap; return Indexes->getMBBFromIndex(index); } - void insertMBBInMaps(MachineBasicBlock *MBB, - MachineInstr *InsertionPoint = nullptr) { - Indexes->insertMBBInMaps(MBB, InsertionPoint); + void insertMBBInMaps(MachineBasicBlock *MBB) { + Indexes->insertMBBInMaps(MBB); assert(unsigned(MBB->getNumber()) == RegMaskBlocks.size() && "Blocks must be added in order."); RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0)); diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h index 19eab7ae5e35cf..b2133de93ea27a 100644 --- a/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -604,38 +604,27 @@ class raw_ostream; } /// Add the given MachineBasicBlock into the maps. - /// If \p InsertionPoint is specified then the block will be placed - /// before the given machine instr, otherwise it will be placed - /// before the next block in MachineFunction insertion order. - void insertMBBInMaps(MachineBasicBlock *mbb, - MachineInstr *InsertionPoint = nullptr) { - MachineFunction::iterator nextMBB = - std::next(MachineFunction::iterator(mbb)); - - IndexListEntry *startEntry = nullptr; - IndexListEntry *endEntry = nullptr; - IndexList::iterator newItr; - if (InsertionPoint) { - startEntry = createEntry(nullptr, 0); - endEntry = getInstructionIndex(*InsertionPoint).listEntry(); - newItr = indexList.insert(endEntry->getIterator(), startEntry); - } else if (nextMBB == mbb->getParent()->end()) { - startEntry = &indexList.back(); - endEntry = createEntry(nullptr, 0); - newItr = indexList.insertAfter(startEntry->getIterator(), endEntry); - } else { - startEntry = createEntry(nullptr, 0); - endEntry = getMBBStartIdx(&*nextMBB).listEntry(); - newItr = indexList.insert(endEntry->getIterator(), startEntry); - } + /// If it contains any instructions then they must already be in the maps. + /// This is used after a block has been split by moving some suffix of its + /// instructions into a newly created block. + void insertMBBInMaps(MachineBasicBlock *mbb) { + assert(mbb != &mbb->getParent()->front() && + "Can't insert a new block at the beginning of a function."); + auto prevMBB = std::prev(MachineFunction::iterator(mbb)); + + // Create a new entry to be used for the start of mbb and the end of + // prevMBB. + IndexListEntry *startEntry = createEntry(nullptr, 0); + IndexListEntry *endEntry = getMBBEndIdx(&*prevMBB).listEntry(); + IndexListEntry *insEntry = + mbb->empty() ? endEntry + : getInstructionIndex(mbb->front()).listEntry(); + IndexList::iterator newItr = + indexList.insert(insEntry->getIterator(), startEntry); SlotIndex startIdx(startEntry, SlotIndex::Slot_Block); SlotIndex endIdx(endEntry, SlotIndex::Slot_Block); - MachineFunction::iterator prevMBB(mbb); - assert(prevMBB != mbb->getParent()->end() && - "Can't insert a new block at the beginning of a function."); - --prevMBB; MBBRanges[prevMBB->getNumber()].second = startIdx; assert(unsigned(mbb->getNumber()) == MBBRanges.size() && diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index c7b404e075e102..fded4b15e67b5d 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -980,7 +980,7 @@ MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI, addLiveIns(*SplitBB, LiveRegs); if (LIS) - LIS->insertMBBInMaps(SplitBB, &MI); + LIS->insertMBBInMaps(SplitBB); return SplitBB; } diff --git a/llvm/unittests/MI/LiveIntervalTest.cpp b/llvm/unittests/MI/LiveIntervalTest.cpp index 3971d86e82d370..d367ee4676a166 100644 --- a/llvm/unittests/MI/LiveIntervalTest.cpp +++ b/llvm/unittests/MI/LiveIntervalTest.cpp @@ -149,6 +149,19 @@ static void testHandleMoveIntoNewBundle(MachineFunction &MF, LiveIntervals &LIS, LIS.handleMoveIntoNewBundle(*BundleStart, true); } +/** + * Split block numbered \p BlockNum at instruction \p SplitAt using + * MachineBasicBlock::splitAt updating liveness intervals. + */ +static void testSplitAt(MachineFunction &MF, LiveIntervals &LIS, + unsigned SplitAt, unsigned BlockNum) { + MachineInstr &SplitInstr = getMI(MF, SplitAt, BlockNum); + MachineBasicBlock &MBB = *SplitInstr.getParent(); + + // Split block and update live intervals + MBB.splitAt(SplitInstr, false, &LIS); +} + static void liveIntervalTest(StringRef MIRFunc, LiveIntervalTest T) { LLVMContext Context; std::unique_ptr TM = createTargetMachine(); @@ -608,6 +621,34 @@ TEST(LiveIntervalTest, BundleSubRegDef) { }); } +TEST(LiveIntervalTest, SplitAtOneInstruction) { + liveIntervalTest(R"MIR( + successors: %bb.1 + %0 = IMPLICIT_DEF + S_BRANCH %bb.1 + bb.1: + S_NOP 0 +)MIR", [](MachineFunction &MF, LiveIntervals &LIS) { + testSplitAt(MF, LIS, 1, 0); + }); +} + +TEST(LiveIntervalTest, SplitAtMultiInstruction) { + liveIntervalTest(R"MIR( + successors: %bb.1 + %0 = IMPLICIT_DEF + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_BRANCH %bb.1 + bb.1: + S_NOP 0 +)MIR", [](MachineFunction &MF, LiveIntervals &LIS) { + testSplitAt(MF, LIS, 0, 0); + }); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); initLLVM(); From 794e3d94d5a97e118bc2ed10f7ba1830dbb25459 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 26 Aug 2020 14:08:14 +0100 Subject: [PATCH 13/86] [AMDGPU][GlobalISel] Remove some duplicate RUN lines Differential Revision: https://reviews.llvm.org/D86618 --- .../GlobalISel/legalize-load-constant.mir | 5111 ----------------- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 4359 -------------- 2 files changed, 9470 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 60152da35f6e01..5f8792734c595f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -2,8 +2,6 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI-MESA %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9-MESA %s --- name: test_load_constant_s1_align1 @@ -32,20 +30,6 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0 = COPY [[AND]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 4) %2:_(s32) = G_ZEXT %1 @@ -79,20 +63,6 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s2_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0 = COPY [[AND]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s2_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 4) %2:_(s32) = G_ZEXT %1 @@ -120,16 +90,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -157,16 +117,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -194,16 +144,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -231,16 +171,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -303,40 +233,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -361,14 +257,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -425,36 +313,6 @@ body: | ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: $vgpr0 = COPY [[OR]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s32_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: $vgpr0 = COPY [[OR]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s32_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: $vgpr0 = COPY [[OR]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -559,68 +417,6 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: $vgpr0 = COPY [[OR2]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s32_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: $vgpr0 = COPY [[OR2]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s32_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 4) $vgpr0 = COPY %1 @@ -647,16 +443,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s24_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s24_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -684,16 +470,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s24_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s24_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -739,28 +515,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s24_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16 - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s24_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16 - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -806,28 +560,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_constant_s24_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16 - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_s24_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16 - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -861,20 +593,6 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) - ; CI-MESA-LABEL: name: test_load_constant_s48_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) - ; GFX9-MESA-LABEL: name: test_load_constant_s48_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 4) %2:_(s64) = G_ZEXT %1 @@ -899,14 +617,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; CI-MESA-LABEL: name: test_load_constant_s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX9-MESA-LABEL: name: test_load_constant_s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -930,14 +640,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; CI-MESA-LABEL: name: test_load_constant_s64_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX9-MESA-LABEL: name: test_load_constant_s64_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -1033,62 +735,6 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CI-MESA-LABEL: name: test_load_constant_s64_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; GFX9-MESA-LABEL: name: test_load_constant_s64_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -1291,136 +937,6 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CI-MESA-LABEL: name: test_load_constant_s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; GFX9-MESA-LABEL: name: test_load_constant_s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -1447,16 +963,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_constant_s96_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_constant_s96_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1483,16 +989,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 8, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_constant_s96_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 8, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_constant_s96_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 8, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1519,16 +1015,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_constant_s96_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_constant_s96_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1666,90 +1152,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_constant_s96_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 + 10, addrspace 4) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_constant_s96_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 + 10, addrspace 4) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2007,170 +1409,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_constant_s96_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_constant_s96_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2215,28 +1453,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128 ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>) ; GFX9: S_NOP 0, implicit [[BITCAST]](s160) - ; CI-MESA-LABEL: name: test_load_constant_s160_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 + 16, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>) - ; CI-MESA: S_NOP 0, implicit [[BITCAST]](s160) - ; GFX9-MESA-LABEL: name: test_load_constant_s160_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 + 16, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>) - ; GFX9-MESA: S_NOP 0, implicit [[BITCAST]](s160) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 4) S_NOP 0, implicit %1 @@ -2287,32 +1503,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) - ; CI-MESA-LABEL: name: test_load_constant_s224_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 + 16, align 4, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>) - ; CI-MESA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) - ; GFX9-MESA-LABEL: name: test_load_constant_s224_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 + 16, align 4, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>) - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 4) %2:_(s256) = G_IMPLICIT_DEF @@ -2342,16 +1532,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; CI-MESA-LABEL: name: test_load_constant_s128_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; GFX9-MESA-LABEL: name: test_load_constant_s128_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2378,16 +1558,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; CI-MESA-LABEL: name: test_load_constant_s128_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; GFX9-MESA-LABEL: name: test_load_constant_s128_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2705,210 +1875,6 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; CI-MESA-LABEL: name: test_load_constant_s128_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; GFX9-MESA-LABEL: name: test_load_constant_s128_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2935,16 +1901,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, align 16, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) - ; CI-MESA-LABEL: name: test_load_constant_s256_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, align 16, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) - ; GFX9-MESA-LABEL: name: test_load_constant_s256_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, align 16, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s256) = G_LOAD %0 :: (load 32, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -2968,14 +1924,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; CI-MESA-LABEL: name: test_load_constant_p1_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; GFX9-MESA-LABEL: name: test_load_constant_p1_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -2999,14 +1947,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; CI-MESA-LABEL: name: test_load_constant_p1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; GFX9-MESA-LABEL: name: test_load_constant_p1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -3209,136 +2149,6 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) - ; CI-MESA-LABEL: name: test_load_constant_p1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) - ; GFX9-MESA-LABEL: name: test_load_constant_p1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -3362,14 +2172,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) - ; CI-MESA-LABEL: name: test_load_constant_p3_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](p3) - ; GFX9-MESA-LABEL: name: test_load_constant_p3_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p3) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -3393,14 +2195,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; CI-MESA-LABEL: name: test_load_constant_p4_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; GFX9-MESA-LABEL: name: test_load_constant_p4_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -3424,14 +2218,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; CI-MESA-LABEL: name: test_load_constant_p4_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; GFX9-MESA-LABEL: name: test_load_constant_p4_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -3527,62 +2313,6 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; CI-MESA-LABEL: name: test_load_constant_p4_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; GFX9-MESA-LABEL: name: test_load_constant_p4_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -3785,136 +2515,6 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; CI-MESA-LABEL: name: test_load_constant_p4_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; GFX9-MESA-LABEL: name: test_load_constant_p4_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -3938,14 +2538,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](p5) - ; CI-MESA-LABEL: name: test_load_constant_p5_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](p5) - ; GFX9-MESA-LABEL: name: test_load_constant_p5_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -4005,38 +2597,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) - ; CI-MESA-LABEL: name: test_load_constant_p5_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) - ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) - ; GFX9-MESA-LABEL: name: test_load_constant_p5_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -4144,70 +2704,6 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) - ; CI-MESA-LABEL: name: test_load_constant_p5_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) - ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) - ; GFX9-MESA-LABEL: name: test_load_constant_p5_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 4) $vgpr0 = COPY %1 @@ -4278,46 +2774,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_constant_v2s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 4) %2:_(s16) = G_BITCAST %1 @@ -4390,46 +2846,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_constant_v2s8_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 4) %2:_(s16) = G_BITCAST %1 @@ -4493,40 +2909,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_constant_v2s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 4) %2:_(s16) = G_BITCAST %1 @@ -4558,18 +2940,6 @@ body: | ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v3s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 4) %2:_(<4 x s8>) = G_IMPLICIT_DEF @@ -4601,18 +2971,6 @@ body: | ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v3s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 4) %2:_(<4 x s8>) = G_IMPLICIT_DEF @@ -4676,40 +3034,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v4s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -4789,52 +3113,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v4s8_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s8_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -4905,46 +3183,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v4s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 4) $vgpr0 = COPY %1 @@ -5041,64 +3279,6 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v8s8_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v8s8_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -5253,104 +3433,6 @@ body: | ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v16s8_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v16s8_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) - ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -5621,184 +3703,6 @@ body: | ; GFX9: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>) ; GFX9: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) - ; CI-MESA-LABEL: name: test_load_constant_v32s8_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CI-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; CI-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; CI-MESA: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>) - ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; CI-MESA: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>) - ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; CI-MESA: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>) - ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) - ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) - ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; CI-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; CI-MESA: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) - ; GFX9-MESA-LABEL: name: test_load_constant_v32s8_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) - ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) - ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>) - ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) - ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>) - ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32) - ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>) - ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) - ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) - ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; GFX9-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32) - ; GFX9-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; GFX9-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -5823,14 +3727,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v2s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -5884,32 +3780,6 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v2s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -6018,71 +3888,6 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v2s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 4) $vgpr0 = COPY %1 @@ -6118,22 +3923,6 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v3s16_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6165,18 +3954,6 @@ body: | ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v3s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 4, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6283,66 +4060,6 @@ body: | ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v3s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6527,121 +4244,6 @@ body: | ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v3s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6667,14 +4269,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v4s16_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -6698,14 +4292,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v4s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -6797,56 +4383,6 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v4s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -7038,127 +4574,6 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v4s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -7182,14 +4597,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) - ; CI-MESA-LABEL: name: test_load_constant_v8s16_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) - ; GFX9-MESA-LABEL: name: test_load_constant_v8s16_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7213,14 +4620,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v2s32_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s32_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -7244,14 +4643,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v2s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -7344,60 +4735,6 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v2s32_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s32_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -7574,116 +4911,6 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v2s32_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s32_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -7707,14 +4934,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v3s32_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s32_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -7740,14 +4959,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v3s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -7771,14 +4982,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v4s32_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s32_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7802,14 +5005,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v4s32_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s32_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7833,14 +5028,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v4s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7864,14 +5051,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v8s32_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v8s32_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -7895,14 +5074,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v16s32_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v16s32_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -7926,14 +5097,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; CI-MESA-LABEL: name: test_load_constant_v16s32_align32_extload_from_16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; GFX9-MESA-LABEL: name: test_load_constant_v16s32_align32_extload_from_16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -7957,14 +5120,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v2s64_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s64_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7988,14 +5143,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v2s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8019,14 +5166,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v2s64_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s64_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8191,108 +5330,6 @@ body: | ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v2s64_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 + 10, addrspace 4) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 + 12, addrspace 4) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 + 14, addrspace 4) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s64_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 + 10, addrspace 4) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 + 12, addrspace 4) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 + 14, addrspace 4) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 2, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8656,246 +5693,6 @@ body: | ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v2s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] - ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8928,20 +5725,6 @@ body: | ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v3s64_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s64_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -8991,30 +5774,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v3s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 + 16, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 + 16, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -9553,364 +6312,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v3s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] - ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 + 16, addrspace 4) - ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 + 17, addrspace 4) - ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 + 18, addrspace 4) - ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 + 19, addrspace 4) - ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 + 20, addrspace 4) - ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 + 21, addrspace 4) - ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 + 22, addrspace 4) - ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 + 23, addrspace 4) - ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) - ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] - ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) - ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) - ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] - ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) - ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) - ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) - ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v3s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 + 16, addrspace 4) - ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 + 17, addrspace 4) - ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 + 18, addrspace 4) - ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 + 19, addrspace 4) - ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 + 20, addrspace 4) - ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 + 21, addrspace 4) - ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 + 22, addrspace 4) - ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 + 23, addrspace 4) - ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) - ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] - ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) - ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] - ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) - ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] - ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) - ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] - ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) - ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) - ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128 - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -9936,14 +6337,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v4s64_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s64_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -9967,14 +6360,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, align 8, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v4s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, align 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, align 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -10654,462 +7039,6 @@ body: | ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_constant_v4s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] - ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 + 16, addrspace 4) - ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 + 17, addrspace 4) - ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 + 18, addrspace 4) - ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 + 19, addrspace 4) - ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 + 20, addrspace 4) - ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 + 21, addrspace 4) - ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 + 22, addrspace 4) - ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 + 23, addrspace 4) - ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) - ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] - ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) - ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) - ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] - ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) - ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) - ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) - ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; CI-MESA: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-MESA: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load 1 + 24, addrspace 4) - ; CI-MESA: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load 1 + 25, addrspace 4) - ; CI-MESA: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; CI-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load 1 + 26, addrspace 4) - ; CI-MESA: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load 1 + 27, addrspace 4) - ; CI-MESA: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; CI-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load 1 + 28, addrspace 4) - ; CI-MESA: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load 1 + 29, addrspace 4) - ; CI-MESA: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; CI-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load 1 + 30, addrspace 4) - ; CI-MESA: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; CI-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load 1 + 31, addrspace 4) - ; CI-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) - ; CI-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] - ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) - ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) - ; CI-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] - ; CI-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) - ; CI-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] - ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) - ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) - ; CI-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] - ; CI-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) - ; CI-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] - ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) - ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) - ; CI-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] - ; CI-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) - ; CI-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] - ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) - ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) - ; CI-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; CI-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) - ; CI-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) - ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) - ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] - ; CI-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) - ; CI-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) - ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) - ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] - ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 + 16, addrspace 4) - ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 + 17, addrspace 4) - ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 + 18, addrspace 4) - ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 + 19, addrspace 4) - ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 + 20, addrspace 4) - ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 + 21, addrspace 4) - ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 + 22, addrspace 4) - ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 + 23, addrspace 4) - ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) - ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] - ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) - ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] - ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) - ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] - ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) - ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] - ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) - ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) - ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; GFX9-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; GFX9-MESA: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load 1 + 24, addrspace 4) - ; GFX9-MESA: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load 1 + 25, addrspace 4) - ; GFX9-MESA: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; GFX9-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load 1 + 26, addrspace 4) - ; GFX9-MESA: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; GFX9-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load 1 + 27, addrspace 4) - ; GFX9-MESA: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; GFX9-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load 1 + 28, addrspace 4) - ; GFX9-MESA: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; GFX9-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load 1 + 29, addrspace 4) - ; GFX9-MESA: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; GFX9-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load 1 + 30, addrspace 4) - ; GFX9-MESA: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; GFX9-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load 1 + 31, addrspace 4) - ; GFX9-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) - ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] - ; GFX9-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) - ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] - ; GFX9-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) - ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] - ; GFX9-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) - ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] - ; GFX9-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) - ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] - ; GFX9-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) - ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] - ; GFX9-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) - ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] - ; GFX9-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) - ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] - ; GFX9-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) - ; GFX9-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) - ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) - ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] - ; GFX9-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) - ; GFX9-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) - ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) - ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] - ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -11136,16 +7065,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) - ; CI-MESA-LABEL: name: test_load_constant_v2s128_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2s128_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -11172,16 +7091,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_constant_v2p1_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2p1_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11208,16 +7117,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_constant_v2p1_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2p1_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11244,16 +7143,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_constant_v2p1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2p1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11571,210 +7460,6 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_constant_v2p1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2p1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 4) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 4) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 4) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 4) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 4) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 4) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 4) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 4) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11798,14 +7483,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; CI-MESA-LABEL: name: test_load_constant_v2p3_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2p3_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -11829,14 +7506,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; CI-MESA-LABEL: name: test_load_constant_v2p3_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2p3_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12019,120 +7688,6 @@ body: | ; GFX9: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - ; CI-MESA-LABEL: name: test_load_constant_v2p3_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - ; GFX9-MESA-LABEL: name: test_load_constant_v2p3_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 4) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 4) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 4) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12156,14 +7711,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; CI-MESA-LABEL: name: test_ext_load_constant_s32_from_1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s32_from_1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -12187,14 +7734,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; CI-MESA-LABEL: name: test_ext_load_constant_s32_from_2_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s32_from_2_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -12222,16 +7761,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12258,16 +7787,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12294,16 +7813,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_4_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_4_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12339,22 +7848,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) - ; CI-MESA-LABEL: name: test_ext_load_constant_s128_from_4_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s128_from_4_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -12381,16 +7874,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12417,16 +7900,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12450,14 +7923,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 1, addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 1, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 1, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -12481,14 +7946,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 2, addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 2, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 2, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -12512,14 +7969,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -12543,14 +7992,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; CI-MESA-LABEL: name: test_extload_constant_v3s32_from_6_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_constant_v3s32_from_6_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load 6, align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -12574,14 +8015,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_extload_constant_v4s32_from_8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_constant_v4s32_from_8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -13064,320 +8497,6 @@ body: | ; GFX9: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96) - ; CI-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 1) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 1) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 1) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 1) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 1) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 1) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 1) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 1) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 1) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 1) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 1) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 1) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 1) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 1) - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 + 16, addrspace 1) - ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 + 17, addrspace 1) - ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 + 18, addrspace 1) - ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 + 19, addrspace 1) - ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) - ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] - ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) - ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]] - ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32) - ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) - ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32) - ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 + 20, addrspace 1) - ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 + 21, addrspace 1) - ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 + 22, addrspace 1) - ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 + 23, addrspace 1) - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) - ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) - ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]] - ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32) - ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] - ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32) - ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) - ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]] - ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32) - ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64 - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>) - ; CI-MESA: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96) - ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96) - ; GFX9-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, addrspace 1) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 1) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 + 4, addrspace 1) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 + 5, addrspace 1) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 + 6, addrspace 1) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 + 7, addrspace 1) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 + 8, addrspace 1) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 + 9, addrspace 1) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 + 10, addrspace 1) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 + 11, addrspace 1) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 + 12, addrspace 1) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 + 13, addrspace 1) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 + 14, addrspace 1) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 + 15, addrspace 1) - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 + 16, addrspace 1) - ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 + 17, addrspace 1) - ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 + 18, addrspace 1) - ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 + 19, addrspace 1) - ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) - ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] - ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) - ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]] - ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32) - ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) - ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32) - ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 + 20, addrspace 1) - ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 + 21, addrspace 1) - ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 + 22, addrspace 1) - ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 + 23, addrspace 1) - ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) - ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) - ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]] - ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32) - ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] - ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32) - ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) - ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]] - ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32) - ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64 - ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>) - ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96) - ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -13635,168 +8754,6 @@ body: | ; GFX9: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96) - ; CI-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 1) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 1) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 + 8, addrspace 1) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 + 10, addrspace 1) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 + 12, addrspace 1) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 + 14, addrspace 1) - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 + 16, addrspace 1) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 + 18, addrspace 1) - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 + 20, addrspace 1) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 + 22, addrspace 1) - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64 - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>) - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96) - ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96) - ; GFX9-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 1) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 + 6, addrspace 1) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 + 8, addrspace 1) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 + 10, addrspace 1) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 + 12, addrspace 1) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 + 14, addrspace 1) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 + 16, addrspace 1) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 + 18, addrspace 1) - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 + 20, addrspace 1) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 + 22, addrspace 1) - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64 - ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>) - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96) - ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -13847,30 +8804,6 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 1) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 + 12, align 4, addrspace 1) - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 1) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 + 12, align 4, addrspace 1) - ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -13921,30 +8854,6 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 1) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 + 12, align 4, addrspace 1) - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 1) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 + 12, align 4, addrspace 1) - ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -13974,16 +8883,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) - ; CI-MESA-LABEL: name: test_load_constant_s512_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) - ; GFX9-MESA-LABEL: name: test_load_constant_s512_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s512) = G_LOAD %0 :: (load 64, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -14010,16 +8909,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) - ; CI-MESA-LABEL: name: test_load_constant_v4s128_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) - ; GFX9-MESA-LABEL: name: test_load_constant_v4s128_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s128>) = G_LOAD %0 :: (load 64, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index fcf4f91fd4f735..44cf01a5472b53 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -2,8 +2,6 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI-MESA %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9-MESA %s --- name: test_load_flat_s1_align1 @@ -32,20 +30,6 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0 = COPY [[AND]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 0) %2:_(s32) = G_ZEXT %1 @@ -79,20 +63,6 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s2_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0 = COPY [[AND]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s2_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 0) %2:_(s32) = G_ZEXT %1 @@ -120,16 +90,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 0) %2:_(s32) = G_ANYEXT %1 @@ -157,16 +117,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 0) %2:_(s32) = G_ANYEXT %1 @@ -194,16 +144,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 0) %2:_(s32) = G_ANYEXT %1 @@ -231,16 +171,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 0) %2:_(s32) = G_ANYEXT %1 @@ -303,40 +233,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 0) %2:_(s32) = G_ANYEXT %1 @@ -361,14 +257,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -425,36 +313,6 @@ body: | ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: $vgpr0 = COPY [[OR]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s32_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: $vgpr0 = COPY [[OR]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s32_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -559,68 +417,6 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: $vgpr0 = COPY [[OR2]](s32) - ; CI-MESA-LABEL: name: test_load_flat_s32_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: $vgpr0 = COPY [[OR2]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_s32_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 0) $vgpr0 = COPY %1 @@ -653,20 +449,6 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) - ; CI-MESA-LABEL: name: test_load_flat_s48_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) - ; GFX9-MESA-LABEL: name: test_load_flat_s48_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 0) %2:_(s64) = G_ZEXT %1 @@ -691,14 +473,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; CI-MESA-LABEL: name: test_load_flat_s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX9-MESA-LABEL: name: test_load_flat_s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -722,14 +496,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; CI-MESA-LABEL: name: test_load_flat_s64_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX9-MESA-LABEL: name: test_load_flat_s64_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -825,62 +591,6 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CI-MESA-LABEL: name: test_load_flat_s64_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; GFX9-MESA-LABEL: name: test_load_flat_s64_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -1083,136 +793,6 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CI-MESA-LABEL: name: test_load_flat_s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; GFX9-MESA-LABEL: name: test_load_flat_s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -1239,16 +819,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_flat_s96_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_flat_s96_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1275,16 +845,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 8) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_flat_s96_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 8) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_flat_s96_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 8) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1311,16 +871,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_flat_s96_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_flat_s96_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1458,90 +1008,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_flat_s96_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 + 8) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 + 10) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_flat_s96_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 + 8) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 + 10) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1799,170 +1265,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; CI-MESA-LABEL: name: test_load_flat_s96_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) - ; GFX9-MESA-LABEL: name: test_load_flat_s96_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2007,28 +1309,6 @@ body: | ; GFX9: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128 ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>) ; GFX9: S_NOP 0, implicit [[BITCAST]](s160) - ; CI-MESA-LABEL: name: test_load_flat_s160_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 + 16) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>) - ; CI-MESA: S_NOP 0, implicit [[BITCAST]](s160) - ; GFX9-MESA-LABEL: name: test_load_flat_s160_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 + 16) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>) - ; GFX9-MESA: S_NOP 0, implicit [[BITCAST]](s160) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 0) S_NOP 0, implicit %1 @@ -2079,32 +1359,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) - ; CI-MESA-LABEL: name: test_load_flat_s224_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 + 16, align 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128 - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>) - ; CI-MESA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) - ; GFX9-MESA-LABEL: name: test_load_flat_s224_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 + 16, align 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128 - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>) - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 0) %2:_(s256) = G_IMPLICIT_DEF @@ -2134,16 +1388,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; CI-MESA-LABEL: name: test_load_flat_s128_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; GFX9-MESA-LABEL: name: test_load_flat_s128_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2170,16 +1414,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; CI-MESA-LABEL: name: test_load_flat_s128_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; GFX9-MESA-LABEL: name: test_load_flat_s128_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2497,210 +1731,6 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; CI-MESA-LABEL: name: test_load_flat_s128_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) - ; GFX9-MESA-LABEL: name: test_load_flat_s128_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2739,24 +1769,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) - ; CI-MESA-LABEL: name: test_load_flat_s256_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) - ; GFX9-MESA-LABEL: name: test_load_flat_s256_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s256) = G_LOAD %0 :: (load 32, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -2780,14 +1792,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; CI-MESA-LABEL: name: test_load_flat_p1_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; GFX9-MESA-LABEL: name: test_load_flat_p1_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -2811,14 +1815,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8, align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; CI-MESA-LABEL: name: test_load_flat_p1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; GFX9-MESA-LABEL: name: test_load_flat_p1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3021,136 +2017,6 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) - ; CI-MESA-LABEL: name: test_load_flat_p1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) - ; GFX9-MESA-LABEL: name: test_load_flat_p1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3174,14 +2040,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load 4) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) - ; CI-MESA-LABEL: name: test_load_flat_p3_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](p3) - ; GFX9-MESA-LABEL: name: test_load_flat_p3_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p3) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -3205,14 +2063,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; CI-MESA-LABEL: name: test_load_flat_p4_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; GFX9-MESA-LABEL: name: test_load_flat_p4_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3236,14 +2086,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8, align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; CI-MESA-LABEL: name: test_load_flat_p4_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) - ; GFX9-MESA-LABEL: name: test_load_flat_p4_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3339,62 +2181,6 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; CI-MESA-LABEL: name: test_load_flat_p4_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; GFX9-MESA-LABEL: name: test_load_flat_p4_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3597,136 +2383,6 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; CI-MESA-LABEL: name: test_load_flat_p4_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) - ; GFX9-MESA-LABEL: name: test_load_flat_p4_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3750,14 +2406,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load 4) ; GFX9: $vgpr0 = COPY [[LOAD]](p5) - ; CI-MESA-LABEL: name: test_load_flat_p5_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](p5) - ; GFX9-MESA-LABEL: name: test_load_flat_p5_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -3817,38 +2465,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) - ; CI-MESA-LABEL: name: test_load_flat_p5_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) - ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) - ; GFX9-MESA-LABEL: name: test_load_flat_p5_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -3956,70 +2572,6 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) - ; CI-MESA-LABEL: name: test_load_flat_p5_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) - ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) - ; GFX9-MESA-LABEL: name: test_load_flat_p5_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 0) $vgpr0 = COPY %1 @@ -4090,46 +2642,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_flat_v2s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 0) %2:_(s16) = G_BITCAST %1 @@ -4202,46 +2714,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_flat_v2s8_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 0) %2:_(s16) = G_BITCAST %1 @@ -4305,40 +2777,6 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-MESA-LABEL: name: test_load_flat_v2s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 0) %2:_(s16) = G_BITCAST %1 @@ -4370,18 +2808,6 @@ body: | ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v3s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 0) %2:_(<4 x s8>) = G_IMPLICIT_DEF @@ -4413,18 +2839,6 @@ body: | ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v3s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 - ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 0) %2:_(<4 x s8>) = G_IMPLICIT_DEF @@ -4488,40 +2902,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v4s8_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s8_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -4601,52 +2981,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v4s8_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s8_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -4717,46 +3051,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v4s8_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s8_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 0) $vgpr0 = COPY %1 @@ -4853,64 +3147,6 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v8s8_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v8s8_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -5065,104 +3301,6 @@ body: | ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v16s8_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v16s8_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) - ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -5445,192 +3583,6 @@ body: | ; GFX9: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>) ; GFX9: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) - ; CI-MESA-LABEL: name: test_load_flat_v32s8_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; CI-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; CI-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) - ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; CI-MESA: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>) - ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; CI-MESA: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>) - ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) - ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; CI-MESA: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>) - ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) - ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) - ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32) - ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; CI-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; CI-MESA: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) - ; GFX9-MESA-LABEL: name: test_load_flat_v32s8_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) - ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) - ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) - ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>) - ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) - ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>) - ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) - ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32) - ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>) - ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) - ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) - ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32) - ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; GFX9-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32) - ; GFX9-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; GFX9-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -5655,14 +3607,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load 4) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v2s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -5716,32 +3660,6 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v2s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -5850,71 +3768,6 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v2s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 0) $vgpr0 = COPY %1 @@ -5950,22 +3803,6 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v3s16_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -5997,18 +3834,6 @@ body: | ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v3s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 4, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6115,66 +3940,6 @@ body: | ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v3s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6359,121 +4124,6 @@ body: | ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v3s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6499,14 +4149,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v4s16_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -6530,14 +4172,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v4s16_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -6629,56 +4263,6 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v4s16_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -6870,127 +4454,6 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v4s16_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -7014,14 +4477,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) - ; CI-MESA-LABEL: name: test_load_flat_v8s16_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) - ; GFX9-MESA-LABEL: name: test_load_flat_v8s16_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7045,14 +4500,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v2s32_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s32_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -7076,14 +4523,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v2s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -7108,14 +4547,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v2s32_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s32_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -7139,14 +4570,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v3s32_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s32_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -7172,14 +4595,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v3s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -7203,14 +4618,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v4s32_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s32_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7234,14 +4641,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v4s32_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s32_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7265,14 +4664,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v4s32_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s32_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7308,22 +4699,6 @@ body: | ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v8s32_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v8s32_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -7347,14 +4722,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; CI-MESA-LABEL: name: test_load_flat_v16s32_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; GFX9-MESA-LABEL: name: test_load_flat_v16s32_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -7378,14 +4745,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v2s64_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s64_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7409,14 +4768,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v2s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7440,14 +4791,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v2s64_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s64_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7612,108 +4955,6 @@ body: | ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v2s64_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 + 8) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 + 10) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2 + 12) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2 + 14) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s64_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 + 6) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 + 8) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 + 10) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2 + 12) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2 + 14) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 2, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8077,246 +5318,6 @@ body: | ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v2s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] - ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8364,30 +5365,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v3s64_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 + 16, align 16) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s64_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 + 16, align 16) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -8437,30 +5414,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v3s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 + 16) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 + 16) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -8999,364 +5952,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v3s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] - ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 + 16) - ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 + 17) - ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 + 18) - ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 + 19) - ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 + 20) - ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 + 21) - ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 + 22) - ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 + 23) - ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) - ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] - ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) - ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) - ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] - ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) - ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) - ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) - ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v3s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 + 16) - ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 + 17) - ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 + 18) - ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 + 19) - ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 + 20) - ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 + 21) - ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 + 22) - ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 + 23) - ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) - ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] - ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) - ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] - ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) - ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] - ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) - ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] - ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) - ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) - ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128 - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -9394,22 +5989,6 @@ body: | ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v4s64_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s64_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -9445,22 +6024,6 @@ body: | ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16, align 8) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v4s64_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16, align 8) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s64_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16, align 8) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -10143,464 +6706,6 @@ body: | ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) - ; CI-MESA-LABEL: name: test_load_flat_v4s64_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] - ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 + 16) - ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 + 17) - ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 + 18) - ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 + 19) - ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 + 20) - ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 + 21) - ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 + 22) - ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 + 23) - ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) - ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] - ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) - ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) - ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] - ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) - ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) - ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) - ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; CI-MESA: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C11]](s64) - ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load 1 + 24) - ; CI-MESA: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load 1 + 25) - ; CI-MESA: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; CI-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load 1 + 26) - ; CI-MESA: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load 1 + 27) - ; CI-MESA: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; CI-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load 1 + 28) - ; CI-MESA: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load 1 + 29) - ; CI-MESA: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; CI-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load 1 + 30) - ; CI-MESA: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; CI-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load 1 + 31) - ; CI-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) - ; CI-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] - ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) - ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) - ; CI-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] - ; CI-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) - ; CI-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] - ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) - ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) - ; CI-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] - ; CI-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) - ; CI-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] - ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) - ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) - ; CI-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] - ; CI-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) - ; CI-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] - ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) - ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) - ; CI-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; CI-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) - ; CI-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) - ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) - ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] - ; CI-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) - ; CI-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) - ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) - ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] - ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) - ; GFX9-MESA-LABEL: name: test_load_flat_v4s64_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] - ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] - ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 + 16) - ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 + 17) - ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 + 18) - ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 + 19) - ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 + 20) - ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 + 21) - ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 + 22) - ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 + 23) - ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) - ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] - ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) - ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] - ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) - ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] - ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) - ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] - ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) - ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] - ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) - ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] - ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) - ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] - ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) - ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] - ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) - ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) - ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; GFX9-MESA: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C10]](s64) - ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load 1 + 24) - ; GFX9-MESA: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load 1 + 25) - ; GFX9-MESA: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; GFX9-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load 1 + 26) - ; GFX9-MESA: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; GFX9-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load 1 + 27) - ; GFX9-MESA: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; GFX9-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load 1 + 28) - ; GFX9-MESA: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; GFX9-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load 1 + 29) - ; GFX9-MESA: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; GFX9-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load 1 + 30) - ; GFX9-MESA: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; GFX9-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load 1 + 31) - ; GFX9-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) - ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] - ; GFX9-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) - ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] - ; GFX9-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) - ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] - ; GFX9-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) - ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] - ; GFX9-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) - ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] - ; GFX9-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) - ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] - ; GFX9-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) - ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] - ; GFX9-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) - ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] - ; GFX9-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) - ; GFX9-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) - ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) - ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] - ; GFX9-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) - ; GFX9-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) - ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) - ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] - ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -10639,24 +6744,6 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) - ; CI-MESA-LABEL: name: test_load_flat_v2s128_align32 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2s128_align32 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -10683,16 +6770,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_flat_v2p1_align16 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2p1_align16 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -10719,16 +6796,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_flat_v2p1_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2p1_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -10755,16 +6822,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_flat_v2p1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2p1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11082,210 +7139,6 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; CI-MESA-LABEL: name: test_load_flat_v2p1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2p1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 + 8) - ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 + 9) - ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 + 10) - ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 + 11) - ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 + 12) - ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 + 13) - ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 + 14) - ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 + 15) - ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11309,14 +7162,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; CI-MESA-LABEL: name: test_load_flat_v2p3_align8 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2p3_align8 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11340,14 +7185,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; CI-MESA-LABEL: name: test_load_flat_v2p3_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2p3_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11530,120 +7367,6 @@ body: | ; GFX9: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - ; CI-MESA-LABEL: name: test_load_flat_v2p3_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) - ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CI-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - ; GFX9-MESA-LABEL: name: test_load_flat_v2p3_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) - ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) - ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 + 4) - ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 + 5) - ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 + 6) - ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 + 7) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; GFX9-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11667,14 +7390,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; CI-MESA-LABEL: name: test_ext_load_flat_s32_from_1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s32_from_1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -11698,14 +7413,6 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; CI-MESA-LABEL: name: test_ext_load_flat_s32_from_2_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s32_from_2_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -11733,16 +7440,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11769,16 +7466,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11805,16 +7492,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_4_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_4_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11850,22 +7527,6 @@ body: | ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) - ; CI-MESA-LABEL: name: test_ext_load_flat_s128_from_4_align4 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s128_from_4_align4 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11892,16 +7553,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align2 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align2 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11928,16 +7579,6 @@ body: | ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align1 - ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align1 - ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 From 60df7c08b1f4447309c0c07fec1c8bc7267562fc Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 28 Dec 2020 12:20:51 +0300 Subject: [PATCH 14/86] [obj2yaml,yaml2obj] - Fix issues with creating/dumping group sections. We have the following issues related to group sections: 1) yaml2obj is unable to set the custom `sh_entsize` value, because the `EntSize` key is currently ignored. 2) obj2yaml is unable to dump the group section which `sh_entsize != 4`. 3) obj2yaml always dumps the "EntSize" for group sections, though usually we are trying to omit dumping default values when dumping keys. I.e. we should not print the "EntSize" key when `sh_entsize` == 4. This patch fixes (1),(3) and adds the test case to document the behavior of (2). Differential revision: https://reviews.llvm.org/D93854 --- llvm/lib/ObjectYAML/ELFEmitter.cpp | 5 +++- .../tools/obj2yaml/ELF/section-group.yaml | 29 +++++++++++++------ llvm/test/tools/yaml2obj/ELF/group.yaml | 14 +++++++-- llvm/tools/obj2yaml/elf2yaml.cpp | 2 ++ 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 010a881379f31a..181b130de621f8 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1275,7 +1275,10 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, SN2I.lookup(".symtab", Link)) SHeader.sh_link = Link; - SHeader.sh_entsize = 4; + if (Section.EntSize) + SHeader.sh_entsize = *Section.EntSize; + else + SHeader.sh_entsize = sizeof(typename ELFT::Word); if (Section.Signature) SHeader.sh_info = diff --git a/llvm/test/tools/obj2yaml/ELF/section-group.yaml b/llvm/test/tools/obj2yaml/ELF/section-group.yaml index 33044ceeb36c4d..bdd65908992d15 100644 --- a/llvm/test/tools/obj2yaml/ELF/section-group.yaml +++ b/llvm/test/tools/obj2yaml/ELF/section-group.yaml @@ -1,13 +1,15 @@ ## Checks that the tool is able to read section groups from ELF. +## Check how groups sections are dumped. +## Check we don't dump the "EntSize" key when sh_entsize == 4. + # RUN: yaml2obj %s -o %t1.o # RUN: obj2yaml %t1.o | FileCheck %s -DSEC=.rodata -# CHECK: - Name: .group -# CHECK-NEXT: Type: SHT_GROUP -# CHECK-NEXT: Link: .symtab -# CHECK-NEXT: EntSize: 0x4 -# CHECK-NEXT: Info: signature +# CHECK: - Name: .group +# CHECK-NEXT: Type: SHT_GROUP +# CHECK-NEXT: Link: .symtab +# CHECK-NEXT: Info: signature # CHECK-NEXT: Members: # CHECK-NEXT: - SectionOrType: GRP_COMDAT # CHECK-NEXT: - SectionOrType: [[SEC]] @@ -19,10 +21,11 @@ FileHeader: Data: ELFDATA2LSB Type: ET_REL Sections: - - Name: .group - Type: SHT_GROUP - Link: .symtab - Info: [[INFO=signature]] + - Name: .group + Type: SHT_GROUP + Link: .symtab + Info: [[INFO=signature]] + EntSize: [[ENTSIZE=]] Members: - SectionOrType: GRP_COMDAT - SectionOrType: [[SEC=.rodata]] @@ -33,6 +36,14 @@ Symbols: Type: STT_OBJECT Section: .rodata +## Document that yaml2obj can't dump the SHT_GROUP section when its sh_entsize != 4. + +# RUN: yaml2obj %s -DENTSIZE=0xfe -o %t1.entsize.o +# RUN: not obj2yaml %t1.entsize.o 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t1.entsize.o --check-prefix=ENTSIZE + +# ENTSIZE: Error reading file: [[FILE]]: section [index 1] has invalid sh_entsize: expected 4, but got 254 + ## Check we are able to dump members of the SHT_GROUP section even when ## one of them has section index 0. diff --git a/llvm/test/tools/yaml2obj/ELF/group.yaml b/llvm/test/tools/yaml2obj/ELF/group.yaml index 56794d9f14d7ce..1f061ade5769ae 100644 --- a/llvm/test/tools/yaml2obj/ELF/group.yaml +++ b/llvm/test/tools/yaml2obj/ELF/group.yaml @@ -19,6 +19,7 @@ Sections: Type: SHT_GROUP Link: 0x1 Info: 0x2 + EntSize: [[ENTSIZE=]] Size: [[SIZE=]] Content: [[CONTENT=]] Members: [[MEMBERS=]] @@ -68,10 +69,19 @@ Sections: # MEMBERS-ERR: error: "Members" cannot be used with "Content" or "Size" ## Check we create an empty section when none of "Size", "Content" or "Members" are specified. +## Check that the default value of sh_entsize is 4. # RUN: yaml2obj %s -o %t.empty.o # RUN: llvm-readelf --sections --section-data %t.empty.o | \ # RUN: FileCheck %s --check-prefix=EMPTY-SEC -# EMPTY-SEC: [Nr] Name Type Address Off Size -# EMPTY-SEC: [ 1] .group GROUP 0000000000000000 000040 000000 +# EMPTY-SEC: [Nr] Name Type Address Off Size ES Flg +# EMPTY-SEC: [ 1] .group GROUP 0000000000000000 000040 000000 04 1 + +## Check that we are able to set an arbitrary entry size for the group section. + +# RUN: yaml2obj %s -DENTSIZE=0xFE -o %t.entsize.o +# RUN: llvm-readelf --sections %t.entsize.o | FileCheck %s --check-prefix=ENTSIZE + +# ENTSIZE: [Nr] Name Type Address Off Size ES Flg +# ENTSIZE: [ 1] .group GROUP 0000000000000000 000040 000000 fe 1 diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index dacbaaf482c03a..f29b1ebca7deeb 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -725,6 +725,8 @@ template static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType, StringRef SecName) { switch (SecType) { + case ELF::SHT_GROUP: + return sizeof(typename ELFT::Word); case ELF::SHT_REL: return sizeof(typename ELFT::Rel); case ELF::SHT_RELA: From c15a57cc1a86bfb72f4fa0e7d265494babc3b412 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 22 Dec 2020 17:36:16 +0300 Subject: [PATCH 15/86] [obj2yaml] - Don't crash when an object has an empty symbol table. Currently we crash when we have an object with SHT_SYMTAB/SHT_DYNSYM sections of size 0. With this patch instead of the crash we start to dump them properly. Differential revision: https://reviews.llvm.org/D93697 --- llvm/test/tools/obj2yaml/ELF/no-symtab.yaml | 34 ++++++++++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 49 ++++++++++++--------- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml b/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml index 8f9fb828564521..132ddfbbc321dd 100644 --- a/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml +++ b/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml @@ -37,3 +37,37 @@ FileHeader: Data: ELFDATA2LSB Type: ET_DYN Symbols: [] + +## A symbol table without the null entry is non-conforming. +## Check we don't print "Symbols" and "DynamicSymbols" keys in this case. + +# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: obj2yaml %t3 | FileCheck %s --check-prefix=EMPTY + +# EMPTY: Sections: +# EMPTY-NEXT: - Name: .symtab +# EMPTY-NEXT: Type: SHT_SYMTAB +# EMPTY-NEXT: Link: .strtab +## TODO: we shouldn't dump the default "EntSize" value. +# EMPTY-NEXT: EntSize: 0x18 +# EMPTY-NEXT: Size: 0x0 +# EMPTY-NEXT: - Name: .dynsym +# EMPTY-NEXT: Type: SHT_DYNSYM +# EMPTY-NEXT: Flags: [ SHF_ALLOC ] +## TODO: we shouldn't dump the default "EntSize" value. +# EMPTY-NEXT: EntSize: 0x18 +# EMPTY-NEXT: Size: 0x0 +# EMPTY-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .symtab + Type: SHT_SYMTAB + Size: 0 + - Name: .dynsym + Type: SHT_DYNSYM + Size: 0 diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index f29b1ebca7deeb..89bbee49657a56 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -55,7 +55,7 @@ class ELFDumper { dumpDWARFSections(std::vector> &Sections); Error dumpSymbols(const Elf_Shdr *Symtab, - std::vector &Symbols); + Optional> &Symbols); Error dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab, StringRef StrTable, ELFYAML::Symbol &S); Expected>> dumpSections(); @@ -219,9 +219,12 @@ bool ELFDumper::shouldPrintSection(const ELFYAML::Section &S, // Generally we are trying to reduce noise in the YAML output. Because // of that we do not print non-allocatable versions of such sections and // assume they are placed at the end. + // We also dump symbol tables when the Size field is set. It happens when they + // are empty, which should not normally happen. if (S.Type == ELF::SHT_STRTAB || S.Type == ELF::SHT_SYMTAB || - S.Type == ELF::SHT_DYNSYM) - return S.Flags.getValueOr(ELFYAML::ELF_SHF(0)) & ELF::SHF_ALLOC; + S.Type == ELF::SHT_DYNSYM) { + return S.Size || S.Flags.getValueOr(ELFYAML::ELF_SHF(0)) & ELF::SHF_ALLOC; + } return true; } @@ -325,17 +328,13 @@ template Expected ELFDumper::dump() { } } - if (SymTab) { - Y->Symbols.emplace(); - if (Error E = dumpSymbols(SymTab, *Y->Symbols)) + if (SymTab) + if (Error E = dumpSymbols(SymTab, Y->Symbols)) return std::move(E); - } - if (DynSymTab) { - Y->DynamicSymbols.emplace(); - if (Error E = dumpSymbols(DynSymTab, *Y->DynamicSymbols)) + if (DynSymTab) + if (Error E = dumpSymbols(DynSymTab, Y->DynamicSymbols)) return std::move(E); - } // We dump all sections first. It is simple and allows us to verify that all // sections are valid and also to generalize the code. But we are not going to @@ -516,6 +515,13 @@ ELFDumper::dumpPlaceholderSection(const Elf_Shdr *Shdr) { auto S = std::make_unique(); if (Error E = dumpCommonSection(Shdr, *S.get())) return std::move(E); + + // Normally symbol tables should not be empty. We dump the "Size" + // key when they are. + if ((Shdr->sh_type == ELF::SHT_SYMTAB || Shdr->sh_type == ELF::SHT_DYNSYM) && + !Shdr->sh_size) + S->Size.emplace(); + return S.release(); } @@ -621,30 +627,33 @@ ELFDumper::dumpSections() { } template -Error ELFDumper::dumpSymbols(const Elf_Shdr *Symtab, - std::vector &Symbols) { +Error ELFDumper::dumpSymbols( + const Elf_Shdr *Symtab, Optional> &Symbols) { if (!Symtab) return Error::success(); - auto StrTableOrErr = Obj.getStringTableForSymtab(*Symtab); - if (!StrTableOrErr) - return StrTableOrErr.takeError(); - StringRef StrTable = *StrTableOrErr; - auto SymtabOrErr = Obj.symbols(Symtab); if (!SymtabOrErr) return SymtabOrErr.takeError(); + if (SymtabOrErr->empty()) + return Error::success(); + + auto StrTableOrErr = Obj.getStringTableForSymtab(*Symtab); + if (!StrTableOrErr) + return StrTableOrErr.takeError(); + if (Symtab->sh_type == ELF::SHT_SYMTAB) { SymTable = *SymtabOrErr; SymbolNames.resize(SymTable.size()); } + Symbols.emplace(); for (const auto &Sym : (*SymtabOrErr).drop_front()) { ELFYAML::Symbol S; - if (auto EC = dumpSymbol(&Sym, Symtab, StrTable, S)) + if (auto EC = dumpSymbol(&Sym, Symtab, *StrTableOrErr, S)) return EC; - Symbols.push_back(S); + Symbols->push_back(S); } return Error::success(); From a06aa1037c17dd3d60f5202b9877d8988e463353 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 12 Jan 2021 06:30:32 -0500 Subject: [PATCH 16/86] Revert "[Test] Add failing test for PR48725" This reverts commit e8287cb2b2923af9da72fd953e2ec5495c33861a. Test unexpectedly passes on mac, see comment 2 on PR48725. --- .../Transforms/LoopStrengthReduce/pr48725.ll | 102 ------------------ 1 file changed, 102 deletions(-) delete mode 100644 llvm/test/Transforms/LoopStrengthReduce/pr48725.ll diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll b/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll deleted file mode 100644 index ef25b92ffd1c0b..00000000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: opt -S -loop-reduce < %s | FileCheck %s -; XFAIL: * - -source_filename = "./simple.ll" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" -target triple = "x86_64-unknown-linux-gnu" - -; CHECK-LABEL: test -define void @test() { -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i32 [ undef, %bb ], [ %tmp87, %bb1 ] - %tmp2 = phi i32 [ undef, %bb ], [ %tmp86, %bb1 ] - %tmp3 = mul i32 %tmp, undef - %tmp4 = xor i32 %tmp3, -1 - %tmp5 = add i32 %tmp, %tmp4 - %tmp6 = add i32 %tmp2, -1 - %tmp7 = add i32 %tmp5, %tmp6 - %tmp8 = mul i32 %tmp7, %tmp3 - %tmp9 = xor i32 %tmp8, -1 - %tmp10 = add i32 %tmp7, %tmp9 - %tmp11 = add i32 %tmp10, undef - %tmp12 = mul i32 %tmp11, %tmp8 - %tmp13 = xor i32 %tmp12, -1 - %tmp14 = add i32 %tmp11, %tmp13 - %tmp15 = add i32 %tmp14, undef - %tmp16 = mul i32 %tmp15, %tmp12 - %tmp17 = add i32 %tmp15, undef - %tmp18 = add i32 %tmp17, undef - %tmp19 = mul i32 %tmp18, %tmp16 - %tmp20 = xor i32 %tmp19, -1 - %tmp21 = add i32 %tmp18, %tmp20 - %tmp22 = add i32 %tmp21, undef - %tmp23 = mul i32 %tmp22, %tmp19 - %tmp24 = xor i32 %tmp23, -1 - %tmp25 = add i32 %tmp22, %tmp24 - %tmp26 = add i32 %tmp25, undef - %tmp27 = mul i32 %tmp26, %tmp23 - %tmp28 = xor i32 %tmp27, -1 - %tmp29 = add i32 %tmp26, %tmp28 - %tmp30 = add i32 %tmp29, undef - %tmp31 = mul i32 %tmp30, %tmp27 - %tmp32 = xor i32 %tmp31, -1 - %tmp33 = add i32 %tmp30, %tmp32 - %tmp34 = add i32 %tmp33, undef - %tmp35 = mul i32 %tmp34, %tmp31 - %tmp36 = xor i32 %tmp35, -1 - %tmp37 = add i32 %tmp34, %tmp36 - %tmp38 = add i32 %tmp2, -9 - %tmp39 = add i32 %tmp37, %tmp38 - %tmp40 = mul i32 %tmp39, %tmp35 - %tmp41 = xor i32 %tmp40, -1 - %tmp42 = add i32 %tmp39, %tmp41 - %tmp43 = add i32 %tmp42, undef - %tmp44 = mul i32 %tmp43, %tmp40 - %tmp45 = xor i32 %tmp44, -1 - %tmp46 = add i32 %tmp43, %tmp45 - %tmp47 = add i32 %tmp46, undef - %tmp48 = mul i32 %tmp47, %tmp44 - %tmp49 = xor i32 %tmp48, -1 - %tmp50 = add i32 %tmp47, %tmp49 - %tmp51 = add i32 %tmp50, undef - %tmp52 = mul i32 %tmp51, %tmp48 - %tmp53 = xor i32 %tmp52, -1 - %tmp54 = add i32 %tmp51, %tmp53 - %tmp55 = add i32 %tmp54, undef - %tmp56 = mul i32 %tmp55, %tmp52 - %tmp57 = xor i32 %tmp56, -1 - %tmp58 = add i32 %tmp55, %tmp57 - %tmp59 = add i32 %tmp2, -14 - %tmp60 = add i32 %tmp58, %tmp59 - %tmp61 = mul i32 %tmp60, %tmp56 - %tmp62 = xor i32 %tmp61, -1 - %tmp63 = add i32 %tmp60, %tmp62 - %tmp64 = add i32 %tmp63, undef - %tmp65 = mul i32 %tmp64, %tmp61 - %tmp66 = xor i32 %tmp65, -1 - %tmp67 = add i32 %tmp64, %tmp66 - %tmp68 = add i32 %tmp67, undef - %tmp69 = mul i32 %tmp68, %tmp65 - %tmp70 = xor i32 %tmp69, -1 - %tmp71 = add i32 %tmp68, %tmp70 - %tmp72 = add i32 %tmp71, undef - %tmp73 = mul i32 %tmp72, %tmp69 - %tmp74 = xor i32 %tmp73, -1 - %tmp75 = add i32 %tmp72, %tmp74 - %tmp76 = add i32 %tmp75, undef - %tmp77 = mul i32 %tmp76, %tmp73 - %tmp78 = xor i32 %tmp77, -1 - %tmp79 = add i32 %tmp76, %tmp78 - %tmp80 = add i32 %tmp79, undef - %tmp81 = mul i32 %tmp80, %tmp77 - %tmp82 = xor i32 %tmp81, -1 - %tmp83 = add i32 %tmp80, %tmp82 - %tmp84 = add i32 %tmp83, undef - %tmp85 = add i32 %tmp84, undef - %tmp86 = add i32 %tmp2, -21 - %tmp87 = add i32 %tmp85, %tmp86 - br label %bb1 -} From ace516fb33d1f3de85f046e96efc1048b4ee8c08 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Thu, 7 Jan 2021 09:41:36 +0100 Subject: [PATCH 17/86] Change the LLVM_ATTRIBUTE_DEPRECATED macro to use C++14 attribute. C++14 attributes are superior because they can be applied to functions with inline definition and the syntax is cleaner. I intend to convert all uses and then remove the macro. One issue that might hold back switching uses to C++14 attributes is that clang-format does not put long attributes on separate lines and formatted code will look like: ``` template [[deprecated("blah blah")]] void foooooooooooooooooooooooooooo() { ... } ``` Putting long attributes on a separate line would be prettier. See https://stackoverflow.com/questions/45740466/clang-format-setting-to-control-c-attributes AttributeMacros probably won't help because it can't match the custom message. https://clang.llvm.org/docs/ClangFormatStyleOptions.html Reviewed By: rriddle, MaskRay Differential Revision: https://reviews.llvm.org/D94219 --- llvm/include/llvm/Support/Compiler.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index a9e4f7f8353d93..9348ada9132509 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -314,19 +314,9 @@ #endif // LLVM_ATTRIBUTE_DEPRECATED(decl, "message") -#if __has_feature(attribute_deprecated_with_message) -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - decl __attribute__((deprecated(message))) -#elif defined(__GNUC__) -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - decl __attribute__((deprecated)) -#elif defined(_MSC_VER) -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - __declspec(deprecated(message)) decl -#else -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - decl -#endif +// This macro will be removed. +// Use C++14's attribute instead: [[deprecated("message")]] +#define LLVM_ATTRIBUTE_DEPRECATED(decl, message) [[deprecated(message)]] decl /// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands /// to an expression which states that it is undefined behavior for the From 09db958e37b3a51942827a48a4b2f453e8fb4737 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Fri, 8 Jan 2021 17:14:08 +0000 Subject: [PATCH 18/86] [RISCV] Improve scalable-vector shift tests (NFC) All i8/i16 and several i32 tests were testing immediate shift amounts which exceeded the bits in the vector elements, creating poison values. Amend the tests to test well-behaved shift amounts. --- .../CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll | 286 ++---------------- .../CodeGen/RISCV/rvv/vshl-sdnode-rv64.ll | 286 ++---------------- .../CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll | 286 ++---------------- .../CodeGen/RISCV/rvv/vsra-sdnode-rv64.ll | 286 ++---------------- .../CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll | 286 ++---------------- .../CodeGen/RISCV/rvv/vsrl-sdnode-rv64.ll | 286 ++---------------- 6 files changed, 156 insertions(+), 1560 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll index 34d055777cc3f9..b1490c8fb0f89a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll @@ -17,22 +17,9 @@ define @vshl_vx_nxv1i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv1i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv1i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv1i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -54,22 +41,9 @@ define @vshl_vx_nxv2i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv2i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv2i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv2i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -91,22 +65,9 @@ define @vshl_vx_nxv4i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv4i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv4i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv4i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -128,22 +89,9 @@ define @vshl_vx_nxv8i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv8i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv8i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv8i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -165,22 +113,9 @@ define @vshl_vx_nxv16i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv16i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv16i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv16i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -202,22 +137,9 @@ define @vshl_vx_nxv32i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv32i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv32i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv32i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -239,22 +161,9 @@ define @vshl_vx_nxv64i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv64i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv64i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv64i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -276,22 +185,9 @@ define @vshl_vx_nxv1i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv1i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv1i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv1i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -313,22 +209,9 @@ define @vshl_vx_nxv2i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv2i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv2i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv2i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -350,22 +233,9 @@ define @vshl_vx_nxv4i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv4i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv4i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv4i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -387,22 +257,9 @@ define @vshl_vx_nxv8i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv8i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv8i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv8i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -424,22 +281,9 @@ define @vshl_vx_nxv16i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv16i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv16i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv16i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -461,22 +305,9 @@ define @vshl_vx_nxv32i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv32i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv32i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv32i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -506,19 +337,6 @@ define @vshl_vx_nxv1i32_0( %va) { ret %vc } -define @vshl_vx_nxv1i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv1i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv2i32( %va, i32 %b) { ; CHECK-LABEL: vshl_vx_nxv2i32: ; CHECK: # %bb.0: @@ -543,19 +361,6 @@ define @vshl_vx_nxv2i32_0( %va) { ret %vc } -define @vshl_vx_nxv2i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv2i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv4i32( %va, i32 %b) { ; CHECK-LABEL: vshl_vx_nxv4i32: ; CHECK: # %bb.0: @@ -580,19 +385,6 @@ define @vshl_vx_nxv4i32_0( %va) { ret %vc } -define @vshl_vx_nxv4i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv4i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv8i32( %va, i32 %b) { ; CHECK-LABEL: vshl_vx_nxv8i32: ; CHECK: # %bb.0: @@ -617,19 +409,6 @@ define @vshl_vx_nxv8i32_0( %va) { ret %vc } -define @vshl_vx_nxv8i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv8i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv16i32( %va, i32 %b) { ; CHECK-LABEL: vshl_vx_nxv16i32: ; CHECK: # %bb.0: @@ -654,19 +433,6 @@ define @vshl_vx_nxv16i32_0( %va) { ret %vc } -define @vshl_vx_nxv16i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv16i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vshl_vx_nxv1i64: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv64.ll index 23c51934743f08..5480f48f26ca8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv64.ll @@ -17,22 +17,9 @@ define @vshl_vx_nxv1i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv1i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv1i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv1i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -54,22 +41,9 @@ define @vshl_vx_nxv2i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv2i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv2i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv2i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -91,22 +65,9 @@ define @vshl_vx_nxv4i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv4i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv4i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv4i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -128,22 +89,9 @@ define @vshl_vx_nxv8i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv8i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv8i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv8i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -165,22 +113,9 @@ define @vshl_vx_nxv16i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv16i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv16i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv16i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -202,22 +137,9 @@ define @vshl_vx_nxv32i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv32i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv32i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv32i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -239,22 +161,9 @@ define @vshl_vx_nxv64i8_0( %va) { ; CHECK-LABEL: vshl_vx_nxv64i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv64i8_1( %va) { -; CHECK-LABEL: vshl_vx_nxv64i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -276,22 +185,9 @@ define @vshl_vx_nxv1i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv1i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv1i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv1i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -313,22 +209,9 @@ define @vshl_vx_nxv2i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv2i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv2i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv2i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -350,22 +233,9 @@ define @vshl_vx_nxv4i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv4i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv4i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv4i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -387,22 +257,9 @@ define @vshl_vx_nxv8i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv8i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv8i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv8i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -424,22 +281,9 @@ define @vshl_vx_nxv16i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv16i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv16i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv16i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -461,22 +305,9 @@ define @vshl_vx_nxv32i16_0( %va) { ; CHECK-LABEL: vshl_vx_nxv32i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vsll.vi v16, v16, 31 +; CHECK-NEXT: vsll.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - -define @vshl_vx_nxv32i16_1( %va) { -; CHECK-LABEL: vshl_vx_nxv32i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = shl %va, %splat ret %vc @@ -506,19 +337,6 @@ define @vshl_vx_nxv1i32_0( %va) { ret %vc } -define @vshl_vx_nxv1i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv1i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv2i32( %va, i32 signext %b) { ; CHECK-LABEL: vshl_vx_nxv2i32: ; CHECK: # %bb.0: @@ -543,19 +361,6 @@ define @vshl_vx_nxv2i32_0( %va) { ret %vc } -define @vshl_vx_nxv2i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv2i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv4i32( %va, i32 signext %b) { ; CHECK-LABEL: vshl_vx_nxv4i32: ; CHECK: # %bb.0: @@ -580,19 +385,6 @@ define @vshl_vx_nxv4i32_0( %va) { ret %vc } -define @vshl_vx_nxv4i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv4i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv8i32( %va, i32 signext %b) { ; CHECK-LABEL: vshl_vx_nxv8i32: ; CHECK: # %bb.0: @@ -617,19 +409,6 @@ define @vshl_vx_nxv8i32_0( %va) { ret %vc } -define @vshl_vx_nxv8i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv8i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv16i32( %va, i32 signext %b) { ; CHECK-LABEL: vshl_vx_nxv16i32: ; CHECK: # %bb.0: @@ -654,19 +433,6 @@ define @vshl_vx_nxv16i32_0( %va) { ret %vc } -define @vshl_vx_nxv16i32_1( %va) { -; CHECK-LABEL: vshl_vx_nxv16i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vsll.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = shl %va, %splat - ret %vc -} - define @vshl_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vshl_vx_nxv1i64: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll index ed5ca9025f846a..3d8a203a94c8e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll @@ -27,22 +27,9 @@ define @vsra_vi_nxv1i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv1i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv1i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv1i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -74,22 +61,9 @@ define @vsra_vi_nxv2i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv2i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv2i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv2i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -121,22 +95,9 @@ define @vsra_vi_nxv4i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv4i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv4i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv4i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -168,22 +129,9 @@ define @vsra_vi_nxv8i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv8i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv8i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv8i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -215,22 +163,9 @@ define @vsra_vi_nxv16i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv16i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv16i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv16i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -262,22 +197,9 @@ define @vsra_vi_nxv32i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv32i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv32i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv32i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -310,22 +232,9 @@ define @vsra_vi_nxv64i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv64i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv64i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv64i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -357,22 +266,9 @@ define @vsra_vi_nxv1i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv1i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv1i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv1i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -404,22 +300,9 @@ define @vsra_vi_nxv2i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv2i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv2i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv2i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -451,22 +334,9 @@ define @vsra_vi_nxv4i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv4i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv4i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv4i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -498,22 +368,9 @@ define @vsra_vi_nxv8i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv8i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv8i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv8i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -545,22 +402,9 @@ define @vsra_vi_nxv16i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv16i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv16i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv16i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -593,22 +437,9 @@ define @vsra_vi_nxv32i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv32i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv32i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv32i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -648,19 +479,6 @@ define @vsra_vi_nxv1i32_0( %va) { ret %vc } -define @vsra_vi_nxv1i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv1i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv2i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv2i32: ; CHECK: # %bb.0: @@ -695,19 +513,6 @@ define @vsra_vi_nxv2i32_0( %va) { ret %vc } -define @vsra_vi_nxv2i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv2i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv4i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv4i32: ; CHECK: # %bb.0: @@ -742,19 +547,6 @@ define @vsra_vi_nxv4i32_0( %va) { ret %vc } -define @vsra_vi_nxv4i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv4i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv8i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv8i32: ; CHECK: # %bb.0: @@ -789,19 +581,6 @@ define @vsra_vi_nxv8i32_0( %va) { ret %vc } -define @vsra_vi_nxv8i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv8i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv16i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv16i32: ; CHECK: # %bb.0: @@ -837,19 +616,6 @@ define @vsra_vi_nxv16i32_0( %va) { ret %vc } -define @vsra_vi_nxv16i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv16i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv1i64( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv1i64: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv64.ll index 550dcb57948569..68f5ba2cf620bf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv64.ll @@ -27,22 +27,9 @@ define @vsra_vi_nxv1i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv1i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv1i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv1i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -74,22 +61,9 @@ define @vsra_vi_nxv2i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv2i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv2i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv2i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -121,22 +95,9 @@ define @vsra_vi_nxv4i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv4i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv4i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv4i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -168,22 +129,9 @@ define @vsra_vi_nxv8i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv8i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv8i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv8i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -215,22 +163,9 @@ define @vsra_vi_nxv16i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv16i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv16i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv16i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -262,22 +197,9 @@ define @vsra_vi_nxv32i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv32i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv32i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv32i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -310,22 +232,9 @@ define @vsra_vi_nxv64i8_0( %va) { ; CHECK-LABEL: vsra_vi_nxv64i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv64i8_1( %va) { -; CHECK-LABEL: vsra_vi_nxv64i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -357,22 +266,9 @@ define @vsra_vi_nxv1i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv1i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv1i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv1i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -404,22 +300,9 @@ define @vsra_vi_nxv2i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv2i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv2i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv2i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -451,22 +334,9 @@ define @vsra_vi_nxv4i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv4i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv4i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv4i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -498,22 +368,9 @@ define @vsra_vi_nxv8i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv8i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv8i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv8i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -545,22 +402,9 @@ define @vsra_vi_nxv16i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv16i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv16i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv16i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -593,22 +437,9 @@ define @vsra_vi_nxv32i16_0( %va) { ; CHECK-LABEL: vsra_vi_nxv32i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vsra.vi v16, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - -define @vsra_vi_nxv32i16_1( %va) { -; CHECK-LABEL: vsra_vi_nxv32i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = ashr %va, %splat ret %vc @@ -648,19 +479,6 @@ define @vsra_vi_nxv1i32_0( %va) { ret %vc } -define @vsra_vi_nxv1i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv1i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv2i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv2i32: ; CHECK: # %bb.0: @@ -695,19 +513,6 @@ define @vsra_vi_nxv2i32_0( %va) { ret %vc } -define @vsra_vi_nxv2i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv2i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv4i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv4i32: ; CHECK: # %bb.0: @@ -742,19 +547,6 @@ define @vsra_vi_nxv4i32_0( %va) { ret %vc } -define @vsra_vi_nxv4i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv4i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv8i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv8i32: ; CHECK: # %bb.0: @@ -789,19 +581,6 @@ define @vsra_vi_nxv8i32_0( %va) { ret %vc } -define @vsra_vi_nxv8i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv8i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv16i32( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv16i32: ; CHECK: # %bb.0: @@ -837,19 +616,6 @@ define @vsra_vi_nxv16i32_0( %va) { ret %vc } -define @vsra_vi_nxv16i32_1( %va) { -; CHECK-LABEL: vsra_vi_nxv16i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vsra.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = ashr %va, %splat - ret %vc -} - define @vsra_vv_nxv1i64( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv1i64: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll index 7d0b1af014debe..448c4f2ba93e28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll @@ -17,22 +17,9 @@ define @vsrl_vx_nxv1i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv1i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv1i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv1i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -54,22 +41,9 @@ define @vsrl_vx_nxv2i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv2i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv2i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv2i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -91,22 +65,9 @@ define @vsrl_vx_nxv4i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv4i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv4i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv4i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -128,22 +89,9 @@ define @vsrl_vx_nxv8i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv8i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv8i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv8i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -165,22 +113,9 @@ define @vsrl_vx_nxv16i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv16i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv16i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv16i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -202,22 +137,9 @@ define @vsrl_vx_nxv32i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv32i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv32i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv32i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -239,22 +161,9 @@ define @vsrl_vx_nxv64i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv64i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv64i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv64i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -276,22 +185,9 @@ define @vsrl_vx_nxv1i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv1i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv1i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv1i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -313,22 +209,9 @@ define @vsrl_vx_nxv2i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv2i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv2i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv2i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -350,22 +233,9 @@ define @vsrl_vx_nxv4i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv4i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv4i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv4i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -387,22 +257,9 @@ define @vsrl_vx_nxv8i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv8i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv8i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv8i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -424,22 +281,9 @@ define @vsrl_vx_nxv16i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv16i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv16i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv16i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -461,22 +305,9 @@ define @vsrl_vx_nxv32i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv32i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv32i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv32i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -506,19 +337,6 @@ define @vsrl_vx_nxv1i32_0( %va) { ret %vc } -define @vsrl_vx_nxv1i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv1i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv2i32( %va, i32 %b) { ; CHECK-LABEL: vsrl_vx_nxv2i32: ; CHECK: # %bb.0: @@ -543,19 +361,6 @@ define @vsrl_vx_nxv2i32_0( %va) { ret %vc } -define @vsrl_vx_nxv2i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv2i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv4i32( %va, i32 %b) { ; CHECK-LABEL: vsrl_vx_nxv4i32: ; CHECK: # %bb.0: @@ -580,19 +385,6 @@ define @vsrl_vx_nxv4i32_0( %va) { ret %vc } -define @vsrl_vx_nxv4i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv4i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv8i32( %va, i32 %b) { ; CHECK-LABEL: vsrl_vx_nxv8i32: ; CHECK: # %bb.0: @@ -617,19 +409,6 @@ define @vsrl_vx_nxv8i32_0( %va) { ret %vc } -define @vsrl_vx_nxv8i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv8i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv16i32( %va, i32 %b) { ; CHECK-LABEL: vsrl_vx_nxv16i32: ; CHECK: # %bb.0: @@ -654,19 +433,6 @@ define @vsrl_vx_nxv16i32_0( %va) { ret %vc } -define @vsrl_vx_nxv16i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv16i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vsrl_vx_nxv1i64: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv64.ll index d1aa31746f8b41..0a5157d95523a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv64.ll @@ -17,22 +17,9 @@ define @vsrl_vx_nxv1i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv1i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv1i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv1i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -54,22 +41,9 @@ define @vsrl_vx_nxv2i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv2i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv2i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv2i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -91,22 +65,9 @@ define @vsrl_vx_nxv4i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv4i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv4i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv4i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -128,22 +89,9 @@ define @vsrl_vx_nxv8i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv8i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv8i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv8i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -165,22 +113,9 @@ define @vsrl_vx_nxv16i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv16i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv16i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv16i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -202,22 +137,9 @@ define @vsrl_vx_nxv32i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv32i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv32i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv32i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -239,22 +161,9 @@ define @vsrl_vx_nxv64i8_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv64i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i8 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv64i8_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv64i8_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i8 32, i32 0 + %head = insertelement undef, i8 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -276,22 +185,9 @@ define @vsrl_vx_nxv1i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv1i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv1i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv1i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -313,22 +209,9 @@ define @vsrl_vx_nxv2i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv2i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv2i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv2i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -350,22 +233,9 @@ define @vsrl_vx_nxv4i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv4i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv4i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv4i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -387,22 +257,9 @@ define @vsrl_vx_nxv8i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv8i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv8i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv8i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -424,22 +281,9 @@ define @vsrl_vx_nxv16i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv16i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 -; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv16i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv16i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -461,22 +305,9 @@ define @vsrl_vx_nxv32i16_0( %va) { ; CHECK-LABEL: vsrl_vx_nxv32i16_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vsrl.vi v16, v16, 31 +; CHECK-NEXT: vsrl.vi v16, v16, 6 ; CHECK-NEXT: ret - %head = insertelement undef, i16 31, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - -define @vsrl_vx_nxv32i16_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv32i16_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i16 32, i32 0 + %head = insertelement undef, i16 6, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = lshr %va, %splat ret %vc @@ -506,19 +337,6 @@ define @vsrl_vx_nxv1i32_0( %va) { ret %vc } -define @vsrl_vx_nxv1i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv1i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv2i32( %va, i32 signext %b) { ; CHECK-LABEL: vsrl_vx_nxv2i32: ; CHECK: # %bb.0: @@ -543,19 +361,6 @@ define @vsrl_vx_nxv2i32_0( %va) { ret %vc } -define @vsrl_vx_nxv2i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv2i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv4i32( %va, i32 signext %b) { ; CHECK-LABEL: vsrl_vx_nxv4i32: ; CHECK: # %bb.0: @@ -580,19 +385,6 @@ define @vsrl_vx_nxv4i32_0( %va) { ret %vc } -define @vsrl_vx_nxv4i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv4i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv8i32( %va, i32 signext %b) { ; CHECK-LABEL: vsrl_vx_nxv8i32: ; CHECK: # %bb.0: @@ -617,19 +409,6 @@ define @vsrl_vx_nxv8i32_0( %va) { ret %vc } -define @vsrl_vx_nxv8i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv8i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv16i32( %va, i32 signext %b) { ; CHECK-LABEL: vsrl_vx_nxv16i32: ; CHECK: # %bb.0: @@ -654,19 +433,6 @@ define @vsrl_vx_nxv16i32_0( %va) { ret %vc } -define @vsrl_vx_nxv16i32_1( %va) { -; CHECK-LABEL: vsrl_vx_nxv16i32_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: ret - %head = insertelement undef, i32 32, i32 0 - %splat = shufflevector %head, undef, zeroinitializer - %vc = lshr %va, %splat - ret %vc -} - define @vsrl_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vsrl_vx_nxv1i64: ; CHECK: # %bb.0: From a5212b5c91cc699052125b8a3428ffe0c123837d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 12 Jan 2021 11:50:09 +0000 Subject: [PATCH 19/86] [X86][SSE] combineSubToSubus - remove SSE2 early-out. SSE2 truncation codegen has improved over the past few years (mainly due to better shuffle lowering/combining and computeKnownBits) - its no longer necessary to early-out from v8i32/v8i64 truncations. This was noticed while looking at completing PR40111 and moving combineSubToSubus to DAGCombine entirely. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +- llvm/test/CodeGen/X86/psubus.ll | 293 +++++++++++------------- 2 files changed, 138 insertions(+), 168 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f9de876a87fa3..e3a94f1c23ab6e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48754,11 +48754,10 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, if (!VT.isVector()) return SDValue(); - // PSUBUS is supported, starting from SSE2, but truncation for v8i32 - // is only worth it with SSSE3 (PSHUFB). + // PSUBUS is supported, starting from SSE2. EVT EltVT = VT.getVectorElementType(); - if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16)) && - !(Subtarget.hasSSSE3() && (VT == MVT::v8i32 || VT == MVT::v8i64)) && + if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16 || + VT == MVT::v8i32 || VT == MVT::v8i64)) && !(Subtarget.useBWIRegs() && (VT == MVT::v16i32))) return SDValue(); @@ -48795,8 +48794,8 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue MinLHS = Op1.getOperand(0).getOperand(0); SDValue MinRHS = Op1.getOperand(0).getOperand(1); EVT TruncVT = Op1.getOperand(0).getValueType(); - if (!(Subtarget.hasSSSE3() && (TruncVT == MVT::v8i32 || - TruncVT == MVT::v8i64)) && + if (!(Subtarget.hasSSE2() && + (TruncVT == MVT::v8i32 || TruncVT == MVT::v8i64)) && !(Subtarget.useBWIRegs() && (TruncVT == MVT::v16i32))) return SDValue(); SDValue OpToSaturate; @@ -48835,7 +48834,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, // values, or first 48 bits for 64 bit values. KnownBits Known = DAG.computeKnownBits(SubusLHS); unsigned NumZeros = Known.countMinLeadingZeros(); - if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16) + if (NumZeros < (VT.getScalarSizeInBits() - 16)) return SDValue(); EVT ExtType = SubusLHS.getValueType(); diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index 92283dba25b81a..906af5e17211af 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -1382,33 +1382,32 @@ vector.ph: define <8 x i16> @psubus_8i32_max(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-LABEL: psubus_8i32_max: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm2, %xmm6 -; SSE2-NEXT: pxor %xmm5, %xmm6 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: por %xmm5, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm4 -; SSE2-NEXT: pand %xmm4, %xmm3 -; SSE2-NEXT: pandn %xmm2, %xmm4 -; SSE2-NEXT: por %xmm3, %xmm4 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: pxor %xmm5, %xmm3 -; SSE2-NEXT: por %xmm0, %xmm5 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE2-NEXT: pand %xmm6, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm6 +; SSE2-NEXT: por %xmm2, %xmm6 +; SSE2-NEXT: pslld $16, %xmm6 +; SSE2-NEXT: psrad $16, %xmm6 +; SSE2-NEXT: pxor %xmm1, %xmm3 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm0 -; SSE2-NEXT: pandn %xmm1, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm0 -; SSE2-NEXT: psubd %xmm1, %xmm0 -; SSE2-NEXT: psubd %xmm2, %xmm4 -; SSE2-NEXT: pslld $16, %xmm4 -; SSE2-NEXT: psrad $16, %xmm4 -; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: pxor %xmm5, %xmm4 +; SSE2-NEXT: pand %xmm1, %xmm5 +; SSE2-NEXT: por %xmm4, %xmm5 +; SSE2-NEXT: pslld $16, %xmm5 +; SSE2-NEXT: psrad $16, %xmm5 +; SSE2-NEXT: packssdw %xmm6, %xmm5 +; SSE2-NEXT: psubusw %xmm5, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm4, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_8i32_max: @@ -1483,91 +1482,72 @@ vector.ph: define <8 x i16> @psubus_8i64_max(<8 x i16> %x, <8 x i64> %y) nounwind { ; SSE2-LABEL: psubus_8i64_max: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: pxor %xmm5, %xmm5 -; SSE2-NEXT: movdqa %xmm0, %xmm10 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm5[4],xmm10[5],xmm5[5],xmm10[6],xmm5[6],xmm10[7],xmm5[7] -; SSE2-NEXT: movdqa %xmm10, %xmm8 -; SSE2-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm5[0],xmm8[1],xmm5[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm10 = xmm10[2],xmm5[2],xmm10[3],xmm5[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3] -; SSE2-NEXT: movdqa %xmm0, %xmm9 -; SSE2-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm5[0],xmm9[1],xmm5[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm5[2],xmm0[3],xmm5[3] -; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456] -; SSE2-NEXT: movdqa %xmm2, %xmm6 -; SSE2-NEXT: pxor %xmm11, %xmm6 -; SSE2-NEXT: movdqa %xmm0, %xmm7 -; SSE2-NEXT: por %xmm11, %xmm7 -; SSE2-NEXT: movdqa %xmm7, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm5[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] -; SSE2-NEXT: pand %xmm12, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm5[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm13 -; SSE2-NEXT: pand %xmm13, %xmm0 -; SSE2-NEXT: pandn %xmm2, %xmm13 -; SSE2-NEXT: por %xmm0, %xmm13 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm11, %xmm0 -; SSE2-NEXT: movdqa %xmm9, %xmm5 -; SSE2-NEXT: por %xmm11, %xmm5 -; SSE2-NEXT: movdqa %xmm5, %xmm7 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm7[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm0, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] -; SSE2-NEXT: pand %xmm12, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] -; SSE2-NEXT: por %xmm5, %xmm0 -; SSE2-NEXT: pand %xmm0, %xmm9 -; SSE2-NEXT: pandn %xmm1, %xmm0 -; SSE2-NEXT: por %xmm9, %xmm0 -; SSE2-NEXT: movdqa %xmm4, %xmm5 -; SSE2-NEXT: pxor %xmm11, %xmm5 -; SSE2-NEXT: movdqa %xmm10, %xmm7 -; SSE2-NEXT: por %xmm11, %xmm7 -; SSE2-NEXT: movdqa %xmm7, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm5, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] -; SSE2-NEXT: pand %xmm9, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] -; SSE2-NEXT: por %xmm5, %xmm7 -; SSE2-NEXT: pand %xmm7, %xmm10 -; SSE2-NEXT: pandn %xmm4, %xmm7 -; SSE2-NEXT: por %xmm10, %xmm7 -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: pxor %xmm11, %xmm5 -; SSE2-NEXT: por %xmm8, %xmm11 -; SSE2-NEXT: movdqa %xmm11, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: movdqa %xmm2, %xmm7 +; SSE2-NEXT: pxor %xmm5, %xmm7 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991] +; SSE2-NEXT: movdqa %xmm8, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm7, %xmm6 ; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm5, %xmm11 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm11[1,1,3,3] -; SSE2-NEXT: pand %xmm9, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm8, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] +; SSE2-NEXT: pand %xmm9, %xmm7 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] -; SSE2-NEXT: por %xmm5, %xmm6 -; SSE2-NEXT: pand %xmm6, %xmm8 -; SSE2-NEXT: pandn %xmm3, %xmm6 -; SSE2-NEXT: por %xmm8, %xmm6 -; SSE2-NEXT: psubq %xmm3, %xmm6 -; SSE2-NEXT: psubq %xmm4, %xmm7 -; SSE2-NEXT: psubq %xmm1, %xmm0 -; SSE2-NEXT: psubq %xmm2, %xmm13 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm13[0,2,2,3] +; SSE2-NEXT: por %xmm7, %xmm6 +; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [65535,65535] +; SSE2-NEXT: pand %xmm6, %xmm2 +; SSE2-NEXT: pandn %xmm9, %xmm6 +; SSE2-NEXT: por %xmm2, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7] +; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: pxor %xmm5, %xmm6 +; SSE2-NEXT: movdqa %xmm8, %xmm7 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm8, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSE2-NEXT: pand %xmm2, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3] +; SSE2-NEXT: por %xmm6, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pandn %xmm9, %xmm2 +; SSE2-NEXT: por %xmm1, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,1,0,2,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1] +; SSE2-NEXT: movdqa %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm5, %xmm2 +; SSE2-NEXT: movdqa %xmm8, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm8, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm4 +; SSE2-NEXT: pandn %xmm9, %xmm6 +; SSE2-NEXT: por %xmm4, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] +; SSE2-NEXT: pxor %xmm3, %xmm5 +; SSE2-NEXT: movdqa %xmm8, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm8, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm5, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pandn %xmm9, %xmm4 +; SSE2-NEXT: por %xmm3, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] +; SSE2-NEXT: psubusw %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_8i64_max: @@ -1943,35 +1923,32 @@ vector.ph: define <8 x i16> @psubus_i16_i32_max_swapped(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-LABEL: psubus_i16_i32_max_swapped: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pxor %xmm5, %xmm3 -; SSE2-NEXT: movdqa %xmm0, %xmm6 -; SSE2-NEXT: por %xmm5, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm3 -; SSE2-NEXT: movdqa %xmm2, %xmm6 -; SSE2-NEXT: pand %xmm3, %xmm6 -; SSE2-NEXT: pandn %xmm0, %xmm3 -; SSE2-NEXT: por %xmm6, %xmm3 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm5, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE2-NEXT: pand %xmm6, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm6 +; SSE2-NEXT: por %xmm2, %xmm6 +; SSE2-NEXT: pslld $16, %xmm6 +; SSE2-NEXT: psrad $16, %xmm6 +; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 +; SSE2-NEXT: pxor %xmm5, %xmm4 +; SSE2-NEXT: pand %xmm1, %xmm5 ; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: pand %xmm0, %xmm5 -; SSE2-NEXT: pandn %xmm4, %xmm0 -; SSE2-NEXT: por %xmm5, %xmm0 -; SSE2-NEXT: psubd %xmm1, %xmm0 -; SSE2-NEXT: psubd %xmm2, %xmm3 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: pslld $16, %xmm5 +; SSE2-NEXT: psrad $16, %xmm5 +; SSE2-NEXT: packssdw %xmm6, %xmm5 +; SSE2-NEXT: psubusw %xmm5, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm3, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_i16_i32_max_swapped: @@ -2046,33 +2023,27 @@ vector.ph: define <8 x i16> @psubus_i16_i32_min(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-LABEL: psubus_i16_i32_min: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; SSE2-NEXT: movdqa %xmm0, %xmm5 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] -; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: pxor %xmm6, %xmm3 -; SSE2-NEXT: movdqa %xmm5, %xmm7 -; SSE2-NEXT: por %xmm6, %xmm7 -; SSE2-NEXT: pcmpgtd %xmm7, %xmm3 -; SSE2-NEXT: pand %xmm3, %xmm5 -; SSE2-NEXT: pandn %xmm1, %xmm3 -; SSE2-NEXT: por %xmm5, %xmm3 -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm6, %xmm1 -; SSE2-NEXT: por %xmm4, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm1 -; SSE2-NEXT: pand %xmm1, %xmm4 -; SSE2-NEXT: pandn %xmm2, %xmm1 -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: pslld $16, %xmm1 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: packssdw %xmm1, %xmm3 -; SSE2-NEXT: psubw %xmm3, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE2-NEXT: pand %xmm6, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm6 +; SSE2-NEXT: por %xmm2, %xmm6 +; SSE2-NEXT: pslld $16, %xmm6 +; SSE2-NEXT: psrad $16, %xmm6 +; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 +; SSE2-NEXT: pxor %xmm5, %xmm4 +; SSE2-NEXT: pand %xmm1, %xmm5 +; SSE2-NEXT: por %xmm4, %xmm5 +; SSE2-NEXT: pslld $16, %xmm5 +; SSE2-NEXT: psrad $16, %xmm5 +; SSE2-NEXT: packssdw %xmm6, %xmm5 +; SSE2-NEXT: psubusw %xmm5, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_i16_i32_min: From c4944a6f53f6d1876e76563599f5f149328e7f8f Mon Sep 17 00:00:00 2001 From: Bevin Hansson Date: Mon, 11 Jan 2021 22:46:42 +0100 Subject: [PATCH 20/86] [Fixed Point] Add codegen for conversion between fixed-point and floating point. The patch adds the required methods to FixedPointBuilder for converting between fixed-point and floating point, and uses them from Clang. This depends on D54749. Reviewed By: leonardchan Differential Revision: https://reviews.llvm.org/D86632 --- clang/lib/CodeGen/CGExprScalar.cpp | 37 ++- clang/test/Frontend/fixed_point_compound.c | 110 +++++++ clang/test/Frontend/fixed_point_conversions.c | 299 +++++++++++++++++ .../Frontend/fixed_point_conversions_half.c | 309 ++++++++++++++++++ llvm/include/llvm/IR/FixedPointBuilder.h | 59 ++++ 5 files changed, 801 insertions(+), 13 deletions(-) create mode 100644 clang/test/Frontend/fixed_point_conversions_half.c diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index d6d5ec544c0873..6f7e8263fa1013 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1212,13 +1212,14 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // padding is enabled because overflow into this bit is undefined // behavior. return Builder.CreateIsNotNull(Src, "tobool"); - if (DstType->isFixedPointType() || DstType->isIntegerType()) + if (DstType->isFixedPointType() || DstType->isIntegerType() || + DstType->isRealFloatingType()) return EmitFixedPointConversion(Src, SrcType, DstType, Loc); llvm_unreachable( "Unhandled scalar conversion from a fixed point type to another type."); } else if (DstType->isFixedPointType()) { - if (SrcType->isIntegerType()) + if (SrcType->isIntegerType() || SrcType->isRealFloatingType()) // This also includes converting booleans and enums to fixed point types. return EmitFixedPointConversion(Src, SrcType, DstType, Loc); @@ -1434,19 +1435,29 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc) { - auto SrcFPSema = CGF.getContext().getFixedPointSemantics(SrcTy); - auto DstFPSema = CGF.getContext().getFixedPointSemantics(DstTy); llvm::FixedPointBuilder FPBuilder(Builder); llvm::Value *Result; - if (DstTy->isIntegerType()) - Result = FPBuilder.CreateFixedToInteger(Src, SrcFPSema, - DstFPSema.getWidth(), - DstFPSema.isSigned()); - else if (SrcTy->isIntegerType()) - Result = FPBuilder.CreateIntegerToFixed(Src, SrcFPSema.isSigned(), - DstFPSema); - else - Result = FPBuilder.CreateFixedToFixed(Src, SrcFPSema, DstFPSema); + if (SrcTy->isRealFloatingType()) + Result = FPBuilder.CreateFloatingToFixed(Src, + CGF.getContext().getFixedPointSemantics(DstTy)); + else if (DstTy->isRealFloatingType()) + Result = FPBuilder.CreateFixedToFloating(Src, + CGF.getContext().getFixedPointSemantics(SrcTy), + ConvertType(DstTy)); + else { + auto SrcFPSema = CGF.getContext().getFixedPointSemantics(SrcTy); + auto DstFPSema = CGF.getContext().getFixedPointSemantics(DstTy); + + if (DstTy->isIntegerType()) + Result = FPBuilder.CreateFixedToInteger(Src, SrcFPSema, + DstFPSema.getWidth(), + DstFPSema.isSigned()); + else if (SrcTy->isIntegerType()) + Result = FPBuilder.CreateIntegerToFixed(Src, SrcFPSema.isSigned(), + DstFPSema); + else + Result = FPBuilder.CreateFixedToFixed(Src, SrcFPSema, DstFPSema); + } return Result; } diff --git a/clang/test/Frontend/fixed_point_compound.c b/clang/test/Frontend/fixed_point_compound.c index 897ba2e22636d5..5dcc7fba0da709 100644 --- a/clang/test/Frontend/fixed_point_compound.c +++ b/clang/test/Frontend/fixed_point_compound.c @@ -16,6 +16,8 @@ int i; unsigned int u; signed char c; +float fl; + // CHECK-LABEL: @add_shfa( // CHECK-NEXT: entry: @@ -358,6 +360,66 @@ void add_sshsuf() { sshf += suf; } +// CHECK-LABEL: @add_afl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], 0x3F00000000000000 +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP0]] +// CHECK-NEXT: [[TMP4:%.*]] = fmul float [[ADD]], 3.276800e+04 +// CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK-NEXT: store i32 [[TMP5]], i32* @a, align 4 +// CHECK-NEXT: ret void +// +void add_afl() { + a += fl; +} + +// CHECK-LABEL: @add_fla( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] +// CHECK-NEXT: store float [[ADD]], float* @fl, align 4 +// CHECK-NEXT: ret void +// +void add_fla() { + fl += a; +} + +// CHECK-LABEL: @add_safl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @sa, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], 0x3F00000000000000 +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP0]] +// CHECK-NEXT: [[TMP4:%.*]] = fmul float [[ADD]], 3.276800e+04 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[TMP4]]) +// CHECK-NEXT: store i32 [[TMP5]], i32* @sa, align 4 +// CHECK-NEXT: ret void +// +void add_safl() { + sa += fl; +} + +// CHECK-LABEL: @add_flsa( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @sa, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] +// CHECK-NEXT: store float [[ADD]], float* @fl, align 4 +// CHECK-NEXT: ret void +// +void add_flsa() { + fl += sa; +} + // Subtraction, multiplication and division should work about the same, so // just make sure we can do them. @@ -429,6 +491,22 @@ void sub_csa() { c -= sa; } +// CHECK-LABEL: @sub_afl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], 0x3F00000000000000 +// CHECK-NEXT: [[SUB:%.*]] = fsub float [[TMP3]], [[TMP0]] +// CHECK-NEXT: [[TMP4:%.*]] = fmul float [[SUB]], 3.276800e+04 +// CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK-NEXT: store i32 [[TMP5]], i32* @a, align 4 +// CHECK-NEXT: ret void +// +void sub_afl() { + a -= fl; +} + // SIGNED-LABEL: @mul_auf( // SIGNED-NEXT: entry: @@ -498,6 +576,22 @@ void mul_csa() { c *= sa; } +// CHECK-LABEL: @mul_afl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], 0x3F00000000000000 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], [[TMP0]] +// CHECK-NEXT: [[TMP4:%.*]] = fmul float [[MUL]], 3.276800e+04 +// CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK-NEXT: store i32 [[TMP5]], i32* @a, align 4 +// CHECK-NEXT: ret void +// +void mul_afl() { + a *= fl; +} + // SIGNED-LABEL: @div_auf( // SIGNED-NEXT: entry: @@ -567,6 +661,22 @@ void div_csa() { c /= sa; } +// CHECK-LABEL: @div_afl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], 0x3F00000000000000 +// CHECK-NEXT: [[DIV:%.*]] = fdiv float [[TMP3]], [[TMP0]] +// CHECK-NEXT: [[TMP4:%.*]] = fmul float [[DIV]], 3.276800e+04 +// CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK-NEXT: store i32 [[TMP5]], i32* @a, align 4 +// CHECK-NEXT: ret void +// +void div_afl() { + a /= fl; +} + // CHECK-LABEL: @shft_ai( // CHECK-NEXT: entry: diff --git a/clang/test/Frontend/fixed_point_conversions.c b/clang/test/Frontend/fixed_point_conversions.c index dfe727c708f417..96bc352d94dc6b 100644 --- a/clang/test/Frontend/fixed_point_conversions.c +++ b/clang/test/Frontend/fixed_point_conversions.c @@ -26,11 +26,15 @@ _Sat unsigned long _Accum sat_ula; _Sat short _Fract sat_sf; _Sat _Fract sat_f; _Sat long _Fract sat_lf; +_Sat unsigned _Fract sat_uf; short s; int i; unsigned int ui; +float fl; +double d; + // CHECK-LABEL: @fix_same1( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 @@ -695,3 +699,298 @@ void int_sat3() { void int_sat4() { sat_usa = ui; } + + +// CHECK-LABEL: @float_fix1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02 +// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i16 +// CHECK-NEXT: store i16 [[TMP2]], i16* @sa, align 2 +// CHECK-NEXT: ret void +// +void float_fix1() { + sa = fl; +} + +// CHECK-LABEL: @float_fix2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04 +// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i32 +// CHECK-NEXT: store i32 [[TMP2]], i32* @a, align 4 +// CHECK-NEXT: ret void +// +void float_fix2() { + a = fl; +} + +// CHECK-LABEL: @float_fix3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 0x41E0000000000000 +// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i64 +// CHECK-NEXT: store i64 [[TMP2]], i64* @la, align 8 +// CHECK-NEXT: ret void +// +void float_fix3() { + la = fl; +} + +// CHECK-LABEL: @float_fix4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02 +// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i8 +// CHECK-NEXT: store i8 [[TMP2]], i8* @sf, align 1 +// CHECK-NEXT: ret void +// +void float_fix4() { + sf = fl; +} + +// CHECK-LABEL: @float_fix5( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 0x41E0000000000000 +// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i32 +// CHECK-NEXT: store i32 [[TMP2]], i32* @lf, align 4 +// CHECK-NEXT: ret void +// +void float_fix5() { + lf = fl; +} + +// SIGNED-LABEL: @float_fix6( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04 +// SIGNED-NEXT: [[TMP2:%.*]] = fptoui float [[TMP1]] to i32 +// SIGNED-NEXT: store i32 [[TMP2]], i32* @ua, align 4 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @float_fix6( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04 +// UNSIGNED-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i32 +// UNSIGNED-NEXT: store i32 [[TMP2]], i32* @ua, align 4 +// UNSIGNED-NEXT: ret void +// +void float_fix6() { + ua = fl; +} + +// SIGNED-LABEL: @float_fix7( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04 +// SIGNED-NEXT: [[TMP2:%.*]] = fptoui float [[TMP1]] to i16 +// SIGNED-NEXT: store i16 [[TMP2]], i16* @uf, align 2 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @float_fix7( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04 +// UNSIGNED-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i16 +// UNSIGNED-NEXT: store i16 [[TMP2]], i16* @uf, align 2 +// UNSIGNED-NEXT: ret void +// +void float_fix7() { + uf = fl; +} + + +// CHECK-LABEL: @fix_float1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* @sa, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i16 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 7.812500e-03 +// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4 +// CHECK-NEXT: ret void +// +void fix_float1() { + fl = sa; +} + +// CHECK-LABEL: @fix_float2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000 +// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4 +// CHECK-NEXT: ret void +// +void fix_float2() { + fl = a; +} + +// CHECK-LABEL: @fix_float3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @la, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i64 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000 +// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4 +// CHECK-NEXT: ret void +// +void fix_float3() { + fl = la; +} + +// CHECK-LABEL: @fix_float4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* @sf, align 1 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i8 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 7.812500e-03 +// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4 +// CHECK-NEXT: ret void +// +void fix_float4() { + fl = sf; +} + +// CHECK-LABEL: @fix_float5( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @lf, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000 +// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4 +// CHECK-NEXT: ret void +// +void fix_float5() { + fl = lf; +} + +// SIGNED-LABEL: @fix_float6( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load i32, i32* @ua, align 4 +// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i32 [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3EF0000000000000 +// SIGNED-NEXT: store float [[TMP2]], float* @fl, align 4 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @fix_float6( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load i32, i32* @ua, align 4 +// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i32 [[TMP0]] to float +// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000 +// UNSIGNED-NEXT: store float [[TMP2]], float* @fl, align 4 +// UNSIGNED-NEXT: ret void +// +void fix_float6() { + fl = ua; +} + +// SIGNED-LABEL: @fix_float7( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2 +// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3EF0000000000000 +// SIGNED-NEXT: store float [[TMP2]], float* @fl, align 4 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @fix_float7( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2 +// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to float +// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000 +// UNSIGNED-NEXT: store float [[TMP2]], float* @fl, align 4 +// UNSIGNED-NEXT: ret void +// +void fix_float7() { + fl = uf; +} + + +// CHECK-LABEL: @float_sat1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02 +// CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f32(float [[TMP1]]) +// CHECK-NEXT: store i16 [[TMP2]], i16* @sat_sa, align 2 +// CHECK-NEXT: ret void +// +void float_sat1() { + sat_sa = fl; +} + +// CHECK-LABEL: @float_sat2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[TMP1]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* @sat_a, align 4 +// CHECK-NEXT: ret void +// +void float_sat2() { + sat_a = fl; +} + +// CHECK-LABEL: @float_sat3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 0x41E0000000000000 +// CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.fptosi.sat.i64.f32(float [[TMP1]]) +// CHECK-NEXT: store i64 [[TMP2]], i64* @sat_la, align 8 +// CHECK-NEXT: ret void +// +void float_sat3() { + sat_la = fl; +} + +// CHECK-LABEL: @float_sat4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02 +// CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fptosi.sat.i8.f32(float [[TMP1]]) +// CHECK-NEXT: store i8 [[TMP2]], i8* @sat_sf, align 1 +// CHECK-NEXT: ret void +// +void float_sat4() { + sat_sf = fl; +} + +// SIGNED-LABEL: @float_sat5( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04 +// SIGNED-NEXT: [[TMP2:%.*]] = call i32 @llvm.fptoui.sat.i32.f32(float [[TMP1]]) +// SIGNED-NEXT: store i32 [[TMP2]], i32* @sat_ua, align 4 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @float_sat5( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04 +// UNSIGNED-NEXT: [[TMP2:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[TMP1]]) +// UNSIGNED-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 +// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]] +// UNSIGNED-NEXT: store i32 [[SATMIN]], i32* @sat_ua, align 4 +// UNSIGNED-NEXT: ret void +// +void float_sat5() { + sat_ua = fl; +} + +// SIGNED-LABEL: @float_sat6( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04 +// SIGNED-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptoui.sat.i16.f32(float [[TMP1]]) +// SIGNED-NEXT: store i16 [[TMP2]], i16* @sat_uf, align 2 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @float_sat6( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04 +// UNSIGNED-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f32(float [[TMP1]]) +// UNSIGNED-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 0 +// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +// UNSIGNED-NEXT: store i16 [[SATMIN]], i16* @sat_uf, align 2 +// UNSIGNED-NEXT: ret void +// +void float_sat6() { + sat_uf = fl; +} diff --git a/clang/test/Frontend/fixed_point_conversions_half.c b/clang/test/Frontend/fixed_point_conversions_half.c new file mode 100644 index 00000000000000..18261edf447409 --- /dev/null +++ b/clang/test/Frontend/fixed_point_conversions_half.c @@ -0,0 +1,309 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -ffixed-point -triple arm64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED +// RUN: %clang_cc1 -ffixed-point -triple arm64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED + +short _Fract sf; +long _Fract lf; + +short _Accum sa; +long _Accum la; + +unsigned short _Accum usa; +unsigned long _Accum ula; + +_Sat short _Fract sf_sat; +_Sat long _Fract lf_sat; + +_Sat short _Accum sa_sat; +_Sat long _Accum la_sat; + +_Sat unsigned short _Accum usa_sat; +_Sat unsigned long _Accum ula_sat; + +_Float16 h; + + +// CHECK-LABEL: @half_fix1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800 +// CHECK-NEXT: [[TMP2:%.*]] = fptosi half [[TMP1]] to i8 +// CHECK-NEXT: store i8 [[TMP2]], i8* @sf, align 1 +// CHECK-NEXT: ret void +// +void half_fix1() { + sf = h; +} + +// CHECK-LABEL: @half_fix2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i32 +// CHECK-NEXT: store i32 [[TMP3]], i32* @lf, align 4 +// CHECK-NEXT: ret void +// +void half_fix2() { + lf = h; +} + +// CHECK-LABEL: @half_fix3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800 +// CHECK-NEXT: [[TMP2:%.*]] = fptosi half [[TMP1]] to i16 +// CHECK-NEXT: store i16 [[TMP2]], i16* @sa, align 2 +// CHECK-NEXT: ret void +// +void half_fix3() { + sa = h; +} + +// CHECK-LABEL: @half_fix4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i64 +// CHECK-NEXT: store i64 [[TMP3]], i64* @la, align 8 +// CHECK-NEXT: ret void +// +void half_fix4() { + la = h; +} + +// SIGNED-LABEL: @half_fix5( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 2.560000e+02 +// SIGNED-NEXT: [[TMP3:%.*]] = fptoui float [[TMP2]] to i16 +// SIGNED-NEXT: store i16 [[TMP3]], i16* @usa, align 2 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @half_fix5( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800 +// UNSIGNED-NEXT: [[TMP2:%.*]] = fptosi half [[TMP1]] to i16 +// UNSIGNED-NEXT: store i16 [[TMP2]], i16* @usa, align 2 +// UNSIGNED-NEXT: ret void +// +void half_fix5() { + usa = h; +} + +// SIGNED-LABEL: @half_fix6( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41F0000000000000 +// SIGNED-NEXT: [[TMP3:%.*]] = fptoui float [[TMP2]] to i64 +// SIGNED-NEXT: store i64 [[TMP3]], i64* @ula, align 8 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @half_fix6( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000 +// UNSIGNED-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i64 +// UNSIGNED-NEXT: store i64 [[TMP3]], i64* @ula, align 8 +// UNSIGNED-NEXT: ret void +// +void half_fix6() { + ula = h; +} + + +// CHECK-LABEL: @half_sat1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800 +// CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fptosi.sat.i8.f16(half [[TMP1]]) +// CHECK-NEXT: store i8 [[TMP2]], i8* @sf_sat, align 1 +// CHECK-NEXT: ret void +// +void half_sat1() { + sf_sat = h; +} + +// CHECK-LABEL: @half_sat2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[TMP2]]) +// CHECK-NEXT: store i32 [[TMP3]], i32* @lf_sat, align 4 +// CHECK-NEXT: ret void +// +void half_sat2() { + lf_sat = h; +} + +// CHECK-LABEL: @half_sat3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800 +// CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f16(half [[TMP1]]) +// CHECK-NEXT: store i16 [[TMP2]], i16* @sa_sat, align 2 +// CHECK-NEXT: ret void +// +void half_sat3() { + sa_sat = h; +} + +// CHECK-LABEL: @half_sat4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fptosi.sat.i64.f32(float [[TMP2]]) +// CHECK-NEXT: store i64 [[TMP3]], i64* @la_sat, align 8 +// CHECK-NEXT: ret void +// +void half_sat4() { + la_sat = h; +} + +// SIGNED-LABEL: @half_sat5( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 2.560000e+02 +// SIGNED-NEXT: [[TMP3:%.*]] = call i16 @llvm.fptoui.sat.i16.f32(float [[TMP2]]) +// SIGNED-NEXT: store i16 [[TMP3]], i16* @usa_sat, align 2 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @half_sat5( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800 +// UNSIGNED-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f16(half [[TMP1]]) +// UNSIGNED-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 0 +// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +// UNSIGNED-NEXT: store i16 [[SATMIN]], i16* @usa_sat, align 2 +// UNSIGNED-NEXT: ret void +// +void half_sat5() { + usa_sat = h; +} + +// SIGNED-LABEL: @half_sat6( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41F0000000000000 +// SIGNED-NEXT: [[TMP3:%.*]] = call i64 @llvm.fptoui.sat.i64.f32(float [[TMP2]]) +// SIGNED-NEXT: store i64 [[TMP3]], i64* @ula_sat, align 8 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @half_sat6( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2 +// UNSIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float +// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000 +// UNSIGNED-NEXT: [[TMP3:%.*]] = call i64 @llvm.fptosi.sat.i64.f32(float [[TMP2]]) +// UNSIGNED-NEXT: [[TMP4:%.*]] = icmp slt i64 [[TMP3]], 0 +// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP4]], i64 0, i64 [[TMP3]] +// UNSIGNED-NEXT: store i64 [[SATMIN]], i64* @ula_sat, align 8 +// UNSIGNED-NEXT: ret void +// +void half_sat6() { + ula_sat = h; +} + + +// CHECK-LABEL: @fix_half1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* @sf, align 1 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i8 [[TMP0]] to half +// CHECK-NEXT: [[TMP2:%.*]] = fmul half [[TMP1]], 0xH2000 +// CHECK-NEXT: store half [[TMP2]], half* @h, align 2 +// CHECK-NEXT: ret void +// +void fix_half1() { + h = sf; +} + +// CHECK-LABEL: @fix_half2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @lf, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half +// CHECK-NEXT: store half [[TMP3]], half* @h, align 2 +// CHECK-NEXT: ret void +// +void fix_half2() { + h = lf; +} + +// CHECK-LABEL: @fix_half3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* @sa, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i16 [[TMP0]] to half +// CHECK-NEXT: [[TMP2:%.*]] = fmul half [[TMP1]], 0xH2000 +// CHECK-NEXT: store half [[TMP2]], half* @h, align 2 +// CHECK-NEXT: ret void +// +void fix_half3() { + h = sa; +} + +// CHECK-LABEL: @fix_half4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @la, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = sitofp i64 [[TMP0]] to float +// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000 +// CHECK-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half +// CHECK-NEXT: store half [[TMP3]], half* @h, align 2 +// CHECK-NEXT: ret void +// +void fix_half4() { + h = la; +} + +// SIGNED-LABEL: @fix_half5( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @usa, align 2 +// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 3.906250e-03 +// SIGNED-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half +// SIGNED-NEXT: store half [[TMP3]], half* @h, align 2 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @fix_half5( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @usa, align 2 +// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to half +// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul half [[TMP1]], 0xH2000 +// UNSIGNED-NEXT: store half [[TMP2]], half* @h, align 2 +// UNSIGNED-NEXT: ret void +// +void fix_half5() { + h = usa; +} + +// SIGNED-LABEL: @fix_half6( +// SIGNED-NEXT: entry: +// SIGNED-NEXT: [[TMP0:%.*]] = load i64, i64* @ula, align 8 +// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i64 [[TMP0]] to float +// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3DF0000000000000 +// SIGNED-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half +// SIGNED-NEXT: store half [[TMP3]], half* @h, align 2 +// SIGNED-NEXT: ret void +// +// UNSIGNED-LABEL: @fix_half6( +// UNSIGNED-NEXT: entry: +// UNSIGNED-NEXT: [[TMP0:%.*]] = load i64, i64* @ula, align 8 +// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i64 [[TMP0]] to float +// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000 +// UNSIGNED-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half +// UNSIGNED-NEXT: store half [[TMP3]], half* @h, align 2 +// UNSIGNED-NEXT: ret void +// +void fix_half6() { + h = ula; +} diff --git a/llvm/include/llvm/IR/FixedPointBuilder.h b/llvm/include/llvm/IR/FixedPointBuilder.h index dcccdb7add3368..a99c761ad3e910 100644 --- a/llvm/include/llvm/IR/FixedPointBuilder.h +++ b/llvm/include/llvm/IR/FixedPointBuilder.h @@ -120,6 +120,16 @@ template class FixedPointBuilder { C.isSigned(), C.isSaturated(), BothPadded); } + /// Given a floating point type and a fixed-point semantic, return a floating + /// point type which can accommodate the fixed-point semantic. This is either + /// \p Ty, or a floating point type with a larger exponent than Ty. + Type *getAccommodatingFloatType(Type *Ty, const FixedPointSemantics &Sema) { + const fltSemantics *FloatSema = &Ty->getFltSemantics(); + while (!Sema.fitsInFloatSemantics(*FloatSema)) + FloatSema = APFixedPoint::promoteFloatSemantics(FloatSema); + return Type::getFloatingPointTy(Ty->getContext(), *FloatSema); + } + public: FixedPointBuilder(IRBuilderTy &Builder) : B(Builder) {} @@ -159,6 +169,55 @@ template class FixedPointBuilder { DstSema, false); } + Value *CreateFixedToFloating(Value *Src, const FixedPointSemantics &SrcSema, + Type *DstTy) { + Value *Result; + Type *OpTy = getAccommodatingFloatType(DstTy, SrcSema); + // Convert the raw fixed-point value directly to floating point. If the + // value is too large to fit, it will be rounded, not truncated. + Result = SrcSema.isSigned() ? B.CreateSIToFP(Src, OpTy) + : B.CreateUIToFP(Src, OpTy); + // Rescale the integral-in-floating point by the scaling factor. This is + // lossless, except for overflow to infinity which is unlikely. + Result = B.CreateFMul(Result, + ConstantFP::get(OpTy, std::pow(2, -(int)SrcSema.getScale()))); + if (OpTy != DstTy) + Result = B.CreateFPTrunc(Result, DstTy); + return Result; + } + + Value *CreateFloatingToFixed(Value *Src, const FixedPointSemantics &DstSema) { + bool UseSigned = DstSema.isSigned() || DstSema.hasUnsignedPadding(); + Value *Result = Src; + Type *OpTy = getAccommodatingFloatType(Src->getType(), DstSema); + if (OpTy != Src->getType()) + Result = B.CreateFPExt(Result, OpTy); + // Rescale the floating point value so that its significant bits (for the + // purposes of the conversion) are in the integral range. + Result = B.CreateFMul(Result, + ConstantFP::get(OpTy, std::pow(2, DstSema.getScale()))); + + Type *ResultTy = B.getIntNTy(DstSema.getWidth()); + if (DstSema.isSaturated()) { + Intrinsic::ID IID = + UseSigned ? Intrinsic::fptosi_sat : Intrinsic::fptoui_sat; + Result = B.CreateIntrinsic(IID, {ResultTy, OpTy}, {Result}); + } else { + Result = UseSigned ? B.CreateFPToSI(Result, ResultTy) + : B.CreateFPToUI(Result, ResultTy); + } + + // When saturating unsigned-with-padding using signed operations, we may + // get negative values. Emit an extra clamp to zero. + if (DstSema.isSaturated() && DstSema.hasUnsignedPadding()) { + Constant *Zero = Constant::getNullValue(Result->getType()); + Result = + B.CreateSelect(B.CreateICmpSLT(Result, Zero), Zero, Result, "satmin"); + } + + return Result; + } + /// Add two fixed-point values and return the result in their common semantic. /// \p LHS - The left hand side /// \p LHSSema - The semantic of the left hand side From 7e44208115b35ad34cc10259e9c375abbd636ef5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 12 Jan 2021 13:43:56 +0000 Subject: [PATCH 21/86] [X86][SSE] combineSubToSubus - add v16i32 handling on pre-AVX512BW targets. v16i32 -> v16i16/v8i16 truncation is now good enough using PACKSS/PACKUS + shuffle combining that its no longer necessary to early-out on pre-AVX512BW targets. This was noticed while looking at completing PR40111 and moving combineSubToSubus to DAGCombine entirely. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +- llvm/test/CodeGen/X86/psubus.ll | 239 +++++++++++------------- 2 files changed, 114 insertions(+), 135 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e3a94f1c23ab6e..750c809eafca91 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48756,9 +48756,9 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, // PSUBUS is supported, starting from SSE2. EVT EltVT = VT.getVectorElementType(); - if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16 || - VT == MVT::v8i32 || VT == MVT::v8i64)) && - !(Subtarget.useBWIRegs() && (VT == MVT::v16i32))) + if (!(Subtarget.hasSSE2() && + (EltVT == MVT::i8 || EltVT == MVT::i16 || VT == MVT::v8i32 || + VT == MVT::v8i64 || VT == MVT::v16i32))) return SDValue(); SDValue SubusLHS, SubusRHS; @@ -48795,8 +48795,8 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue MinRHS = Op1.getOperand(0).getOperand(1); EVT TruncVT = Op1.getOperand(0).getValueType(); if (!(Subtarget.hasSSE2() && - (TruncVT == MVT::v8i32 || TruncVT == MVT::v8i64)) && - !(Subtarget.useBWIRegs() && (TruncVT == MVT::v16i32))) + (TruncVT == MVT::v8i32 || TruncVT == MVT::v8i64 || + TruncVT == MVT::v16i32))) return SDValue(); SDValue OpToSaturate; if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index 906af5e17211af..06240cd8bad382 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -1737,141 +1737,125 @@ vector.ph: define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind { ; SSE2-LABEL: psubus_16i32_max: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: movdqa %xmm1, %xmm8 -; SSE2-NEXT: pxor %xmm7, %xmm7 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] -; SSE2-NEXT: movdqa %xmm0, %xmm10 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1],xmm10[2],xmm7[2],xmm10[3],xmm7[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm3, %xmm6 -; SSE2-NEXT: pxor %xmm7, %xmm6 -; SSE2-NEXT: movdqa %xmm0, %xmm9 -; SSE2-NEXT: por %xmm7, %xmm9 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm9 -; SSE2-NEXT: pand %xmm9, %xmm0 -; SSE2-NEXT: pandn %xmm3, %xmm9 -; SSE2-NEXT: por %xmm0, %xmm9 -; SSE2-NEXT: movdqa %xmm2, %xmm6 -; SSE2-NEXT: pxor %xmm7, %xmm6 -; SSE2-NEXT: movdqa %xmm10, %xmm0 -; SSE2-NEXT: por %xmm7, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm0 -; SSE2-NEXT: pand %xmm0, %xmm10 -; SSE2-NEXT: pandn %xmm2, %xmm0 -; SSE2-NEXT: por %xmm10, %xmm0 -; SSE2-NEXT: movdqa %xmm5, %xmm10 -; SSE2-NEXT: pxor %xmm7, %xmm10 -; SSE2-NEXT: movdqa %xmm8, %xmm6 -; SSE2-NEXT: por %xmm7, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm10, %xmm6 -; SSE2-NEXT: pand %xmm6, %xmm8 -; SSE2-NEXT: pandn %xmm5, %xmm6 -; SSE2-NEXT: por %xmm8, %xmm6 -; SSE2-NEXT: movdqa %xmm4, %xmm8 -; SSE2-NEXT: pxor %xmm7, %xmm8 -; SSE2-NEXT: por %xmm1, %xmm7 -; SSE2-NEXT: pcmpgtd %xmm8, %xmm7 -; SSE2-NEXT: pand %xmm7, %xmm1 -; SSE2-NEXT: pandn %xmm4, %xmm7 -; SSE2-NEXT: por %xmm7, %xmm1 -; SSE2-NEXT: psubd %xmm4, %xmm1 -; SSE2-NEXT: psubd %xmm5, %xmm6 -; SSE2-NEXT: psubd %xmm2, %xmm0 -; SSE2-NEXT: psubd %xmm3, %xmm9 -; SSE2-NEXT: pslld $16, %xmm9 -; SSE2-NEXT: psrad $16, %xmm9 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm9, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm3, %xmm8 +; SSE2-NEXT: pxor %xmm9, %xmm8 +; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147549183,2147549183,2147549183,2147549183] +; SSE2-NEXT: movdqa %xmm7, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm8, %xmm6 +; SSE2-NEXT: pcmpeqd %xmm8, %xmm8 +; SSE2-NEXT: pand %xmm6, %xmm3 +; SSE2-NEXT: pxor %xmm8, %xmm6 +; SSE2-NEXT: por %xmm3, %xmm6 ; SSE2-NEXT: pslld $16, %xmm6 ; SSE2-NEXT: psrad $16, %xmm6 -; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm10 +; SSE2-NEXT: pxor %xmm9, %xmm10 +; SSE2-NEXT: movdqa %xmm7, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm10, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: pxor %xmm8, %xmm3 +; SSE2-NEXT: por %xmm2, %xmm3 +; SSE2-NEXT: pslld $16, %xmm3 +; SSE2-NEXT: psrad $16, %xmm3 +; SSE2-NEXT: packssdw %xmm6, %xmm3 +; SSE2-NEXT: movdqa %xmm5, %xmm2 +; SSE2-NEXT: pxor %xmm9, %xmm2 +; SSE2-NEXT: movdqa %xmm7, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm5 +; SSE2-NEXT: pxor %xmm8, %xmm6 +; SSE2-NEXT: por %xmm5, %xmm6 +; SSE2-NEXT: pslld $16, %xmm6 +; SSE2-NEXT: psrad $16, %xmm6 +; SSE2-NEXT: pxor %xmm4, %xmm9 +; SSE2-NEXT: pcmpgtd %xmm9, %xmm7 +; SSE2-NEXT: pxor %xmm7, %xmm8 +; SSE2-NEXT: pand %xmm4, %xmm7 +; SSE2-NEXT: por %xmm8, %xmm7 +; SSE2-NEXT: pslld $16, %xmm7 +; SSE2-NEXT: psrad $16, %xmm7 +; SSE2-NEXT: packssdw %xmm6, %xmm7 +; SSE2-NEXT: psubusw %xmm7, %xmm1 +; SSE2-NEXT: psubusw %xmm3, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm2, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] ; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: packssdw %xmm6, %xmm1 +; SSE2-NEXT: packssdw %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_16i32_max: ; SSSE3: # %bb.0: # %vector.ph -; SSSE3-NEXT: movdqa %xmm1, %xmm8 -; SSSE3-NEXT: pxor %xmm7, %xmm7 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] -; SSSE3-NEXT: movdqa %xmm0, %xmm10 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1],xmm10[2],xmm7[2],xmm10[3],xmm7[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7] -; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648] -; SSSE3-NEXT: movdqa %xmm3, %xmm6 -; SSSE3-NEXT: pxor %xmm7, %xmm6 -; SSSE3-NEXT: movdqa %xmm0, %xmm9 -; SSSE3-NEXT: por %xmm7, %xmm9 -; SSSE3-NEXT: pcmpgtd %xmm6, %xmm9 -; SSSE3-NEXT: pand %xmm9, %xmm0 -; SSSE3-NEXT: pandn %xmm3, %xmm9 -; SSSE3-NEXT: por %xmm0, %xmm9 -; SSSE3-NEXT: movdqa %xmm2, %xmm6 -; SSSE3-NEXT: pxor %xmm7, %xmm6 -; SSSE3-NEXT: movdqa %xmm10, %xmm0 -; SSSE3-NEXT: por %xmm7, %xmm0 -; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0 -; SSSE3-NEXT: pand %xmm0, %xmm10 -; SSSE3-NEXT: pandn %xmm2, %xmm0 -; SSSE3-NEXT: por %xmm10, %xmm0 -; SSSE3-NEXT: movdqa %xmm5, %xmm10 -; SSSE3-NEXT: pxor %xmm7, %xmm10 -; SSSE3-NEXT: movdqa %xmm8, %xmm6 -; SSSE3-NEXT: por %xmm7, %xmm6 -; SSSE3-NEXT: pcmpgtd %xmm10, %xmm6 -; SSSE3-NEXT: pand %xmm6, %xmm8 -; SSSE3-NEXT: pandn %xmm5, %xmm6 -; SSSE3-NEXT: por %xmm8, %xmm6 -; SSSE3-NEXT: movdqa %xmm4, %xmm8 -; SSSE3-NEXT: pxor %xmm7, %xmm8 -; SSSE3-NEXT: por %xmm1, %xmm7 -; SSSE3-NEXT: pcmpgtd %xmm8, %xmm7 -; SSSE3-NEXT: pand %xmm7, %xmm1 -; SSSE3-NEXT: pandn %xmm4, %xmm7 -; SSSE3-NEXT: por %xmm7, %xmm1 -; SSSE3-NEXT: psubd %xmm4, %xmm1 -; SSSE3-NEXT: psubd %xmm5, %xmm6 -; SSSE3-NEXT: psubd %xmm2, %xmm0 -; SSSE3-NEXT: psubd %xmm3, %xmm9 -; SSSE3-NEXT: pslld $16, %xmm9 -; SSSE3-NEXT: psrad $16, %xmm9 -; SSSE3-NEXT: pslld $16, %xmm0 -; SSSE3-NEXT: psrad $16, %xmm0 -; SSSE3-NEXT: packssdw %xmm9, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm3, %xmm8 +; SSSE3-NEXT: pxor %xmm9, %xmm8 +; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147549183,2147549183,2147549183,2147549183] +; SSSE3-NEXT: movdqa %xmm7, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm8, %xmm6 +; SSSE3-NEXT: pcmpeqd %xmm8, %xmm8 +; SSSE3-NEXT: pand %xmm6, %xmm3 +; SSSE3-NEXT: pxor %xmm8, %xmm6 +; SSSE3-NEXT: por %xmm3, %xmm6 +; SSSE3-NEXT: pslld $16, %xmm6 +; SSSE3-NEXT: psrad $16, %xmm6 +; SSSE3-NEXT: movdqa %xmm2, %xmm10 +; SSSE3-NEXT: pxor %xmm9, %xmm10 +; SSSE3-NEXT: movdqa %xmm7, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm2 +; SSSE3-NEXT: pxor %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm2, %xmm3 +; SSSE3-NEXT: pslld $16, %xmm3 +; SSSE3-NEXT: psrad $16, %xmm3 +; SSSE3-NEXT: packssdw %xmm6, %xmm3 +; SSSE3-NEXT: movdqa %xmm5, %xmm2 +; SSSE3-NEXT: pxor %xmm9, %xmm2 +; SSSE3-NEXT: movdqa %xmm7, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm5 +; SSSE3-NEXT: pxor %xmm8, %xmm6 +; SSSE3-NEXT: por %xmm5, %xmm6 ; SSSE3-NEXT: pslld $16, %xmm6 ; SSSE3-NEXT: psrad $16, %xmm6 -; SSSE3-NEXT: pslld $16, %xmm1 +; SSSE3-NEXT: pxor %xmm4, %xmm9 +; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7 +; SSSE3-NEXT: pxor %xmm7, %xmm8 +; SSSE3-NEXT: pand %xmm4, %xmm7 +; SSSE3-NEXT: por %xmm8, %xmm7 +; SSSE3-NEXT: pslld $16, %xmm7 +; SSSE3-NEXT: psrad $16, %xmm7 +; SSSE3-NEXT: packssdw %xmm6, %xmm7 +; SSSE3-NEXT: psubusw %xmm7, %xmm1 +; SSSE3-NEXT: psubusw %xmm3, %xmm0 +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSSE3-NEXT: psrad $16, %xmm2 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: psrad $16, %xmm0 +; SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSSE3-NEXT: psrad $16, %xmm2 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: psrad $16, %xmm1 -; SSSE3-NEXT: packssdw %xmm6, %xmm1 +; SSSE3-NEXT: packssdw %xmm2, %xmm1 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: psubus_16i32_max: ; SSE41: # %bb.0: # %vector.ph -; SSE41-NEXT: pxor %xmm8, %xmm8 -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm6 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm8[4],xmm1[5],xmm8[5],xmm1[6],xmm8[6],xmm1[7],xmm8[7] -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm7 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm8[4],xmm0[5],xmm8[5],xmm0[6],xmm8[6],xmm0[7],xmm8[7] -; SSE41-NEXT: pmaxud %xmm3, %xmm0 -; SSE41-NEXT: pmaxud %xmm2, %xmm7 -; SSE41-NEXT: pmaxud %xmm5, %xmm1 -; SSE41-NEXT: pmaxud %xmm4, %xmm6 -; SSE41-NEXT: psubd %xmm4, %xmm6 -; SSE41-NEXT: psubd %xmm5, %xmm1 -; SSE41-NEXT: psubd %xmm2, %xmm7 -; SSE41-NEXT: psubd %xmm3, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm8[1],xmm0[2],xmm8[3],xmm0[4],xmm8[5],xmm0[6],xmm8[7] -; SSE41-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0],xmm8[1],xmm7[2],xmm8[3],xmm7[4],xmm8[5],xmm7[6],xmm8[7] -; SSE41-NEXT: packusdw %xmm0, %xmm7 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm8[1],xmm1[2],xmm8[3],xmm1[4],xmm8[5],xmm1[6],xmm8[7] -; SSE41-NEXT: pblendw {{.*#+}} xmm6 = xmm6[0],xmm8[1],xmm6[2],xmm8[3],xmm6[4],xmm8[5],xmm6[6],xmm8[7] -; SSE41-NEXT: packusdw %xmm1, %xmm6 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535] +; SSE41-NEXT: pminud %xmm6, %xmm5 +; SSE41-NEXT: pminud %xmm6, %xmm4 +; SSE41-NEXT: packusdw %xmm5, %xmm4 +; SSE41-NEXT: pminud %xmm6, %xmm3 +; SSE41-NEXT: pminud %xmm6, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: psubusw %xmm2, %xmm0 +; SSE41-NEXT: psubusw %xmm4, %xmm1 ; SSE41-NEXT: retq ; ; AVX1-LABEL: psubus_16i32_max: @@ -1894,16 +1878,11 @@ define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind { ; AVX2-LABEL: psubus_16i32_max: ; AVX2: # %bb.0: # %vector.ph ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX2-NEXT: vpminud %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 -; AVX2-NEXT: vpackusdw %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpminud %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; AVX2-NEXT: vpsubusw %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX2-NEXT: vpminud %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpackusdw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX2-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: psubus_16i32_max: From 2ed914cb7e9c0737bdf60a0b1fd48b6499973325 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 12 Jan 2021 14:07:53 +0000 Subject: [PATCH 22/86] [X86][SSE] getFauxShuffleMask - handle PACKSS(SRAI(),SRAI()) shuffle patterns. We can't easily treat ASHR a faux shuffle, but if it was just feeding a PACKSS then it was likely being used as sign-extension for a truncation, so just peek through and adjust the mask accordingly. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 21 +++++ llvm/test/CodeGen/X86/psubus.ll | 114 +++++++++--------------- 2 files changed, 63 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 750c809eafca91..f28e28689806fa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7685,12 +7685,26 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // If we know input saturation won't happen (or we don't care for particular // lanes), we can treat this as a truncation shuffle. + bool Offset0 = false, Offset1 = false; if (Opcode == X86ISD::PACKSS) { if ((!(N0.isUndef() || EltsLHS.isNullValue()) && DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) || (!(N1.isUndef() || EltsRHS.isNullValue()) && DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt)) return false; + // We can't easily fold ASHR into a shuffle, but if it was feeding a + // PACKSS then it was likely being used for sign-extension for a + // truncation, so just peek through and adjust the mask accordingly. + if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) && + N0.getConstantOperandAPInt(1) == NumBitsPerElt) { + Offset0 = true; + N0 = N0.getOperand(0); + } + if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) && + N1.getConstantOperandAPInt(1) == NumBitsPerElt) { + Offset1 = true; + N1 = N1.getOperand(0); + } } else { APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); if ((!(N0.isUndef() || EltsLHS.isNullValue()) && @@ -7707,6 +7721,13 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, Ops.push_back(N1); createPackShuffleMask(VT, Mask, IsUnary); + + if (Offset0 || Offset1) { + for (int &M : Mask) + if ((Offset0 && isInRange(M, 0, NumElts)) || + (Offset1 && isInRange(M, NumElts, 2 * NumElts))) + ++M; + } return true; } case X86ISD::VTRUNC: { diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index 06240cd8bad382..351629a732c1f5 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -1403,11 +1403,6 @@ define <8 x i16> @psubus_8i32_max(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-NEXT: psrad $16, %xmm5 ; SSE2-NEXT: packssdw %xmm6, %xmm5 ; SSE2-NEXT: psubusw %xmm5, %xmm0 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_8i32_max: @@ -1738,111 +1733,91 @@ define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind { ; SSE2-LABEL: psubus_16i32_max: ; SSE2: # %bb.0: # %vector.ph ; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm3, %xmm8 +; SSE2-NEXT: movdqa %xmm5, %xmm8 ; SSE2-NEXT: pxor %xmm9, %xmm8 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147549183,2147549183,2147549183,2147549183] ; SSE2-NEXT: movdqa %xmm7, %xmm6 ; SSE2-NEXT: pcmpgtd %xmm8, %xmm6 ; SSE2-NEXT: pcmpeqd %xmm8, %xmm8 -; SSE2-NEXT: pand %xmm6, %xmm3 +; SSE2-NEXT: pand %xmm6, %xmm5 ; SSE2-NEXT: pxor %xmm8, %xmm6 -; SSE2-NEXT: por %xmm3, %xmm6 +; SSE2-NEXT: por %xmm5, %xmm6 ; SSE2-NEXT: pslld $16, %xmm6 ; SSE2-NEXT: psrad $16, %xmm6 -; SSE2-NEXT: movdqa %xmm2, %xmm10 +; SSE2-NEXT: movdqa %xmm4, %xmm10 ; SSE2-NEXT: pxor %xmm9, %xmm10 -; SSE2-NEXT: movdqa %xmm7, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm10, %xmm3 -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pxor %xmm8, %xmm3 -; SSE2-NEXT: por %xmm2, %xmm3 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: packssdw %xmm6, %xmm3 -; SSE2-NEXT: movdqa %xmm5, %xmm2 -; SSE2-NEXT: pxor %xmm9, %xmm2 +; SSE2-NEXT: movdqa %xmm7, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm10, %xmm5 +; SSE2-NEXT: pand %xmm5, %xmm4 +; SSE2-NEXT: pxor %xmm8, %xmm5 +; SSE2-NEXT: por %xmm4, %xmm5 +; SSE2-NEXT: pslld $16, %xmm5 +; SSE2-NEXT: psrad $16, %xmm5 +; SSE2-NEXT: packssdw %xmm6, %xmm5 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pxor %xmm9, %xmm4 ; SSE2-NEXT: movdqa %xmm7, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 -; SSE2-NEXT: pand %xmm6, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm3 ; SSE2-NEXT: pxor %xmm8, %xmm6 -; SSE2-NEXT: por %xmm5, %xmm6 +; SSE2-NEXT: por %xmm3, %xmm6 ; SSE2-NEXT: pslld $16, %xmm6 ; SSE2-NEXT: psrad $16, %xmm6 -; SSE2-NEXT: pxor %xmm4, %xmm9 +; SSE2-NEXT: pxor %xmm2, %xmm9 ; SSE2-NEXT: pcmpgtd %xmm9, %xmm7 ; SSE2-NEXT: pxor %xmm7, %xmm8 -; SSE2-NEXT: pand %xmm4, %xmm7 +; SSE2-NEXT: pand %xmm2, %xmm7 ; SSE2-NEXT: por %xmm8, %xmm7 ; SSE2-NEXT: pslld $16, %xmm7 ; SSE2-NEXT: psrad $16, %xmm7 ; SSE2-NEXT: packssdw %xmm6, %xmm7 -; SSE2-NEXT: psubusw %xmm7, %xmm1 -; SSE2-NEXT: psubusw %xmm3, %xmm0 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: psrad $16, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm2, %xmm0 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: psrad $16, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: packssdw %xmm2, %xmm1 +; SSE2-NEXT: psubusw %xmm7, %xmm0 +; SSE2-NEXT: psubusw %xmm5, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_16i32_max: ; SSSE3: # %bb.0: # %vector.ph ; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648] -; SSSE3-NEXT: movdqa %xmm3, %xmm8 +; SSSE3-NEXT: movdqa %xmm5, %xmm8 ; SSSE3-NEXT: pxor %xmm9, %xmm8 ; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147549183,2147549183,2147549183,2147549183] ; SSSE3-NEXT: movdqa %xmm7, %xmm6 ; SSSE3-NEXT: pcmpgtd %xmm8, %xmm6 ; SSSE3-NEXT: pcmpeqd %xmm8, %xmm8 -; SSSE3-NEXT: pand %xmm6, %xmm3 +; SSSE3-NEXT: pand %xmm6, %xmm5 ; SSSE3-NEXT: pxor %xmm8, %xmm6 -; SSSE3-NEXT: por %xmm3, %xmm6 +; SSSE3-NEXT: por %xmm5, %xmm6 ; SSSE3-NEXT: pslld $16, %xmm6 ; SSSE3-NEXT: psrad $16, %xmm6 -; SSSE3-NEXT: movdqa %xmm2, %xmm10 +; SSSE3-NEXT: movdqa %xmm4, %xmm10 ; SSSE3-NEXT: pxor %xmm9, %xmm10 -; SSSE3-NEXT: movdqa %xmm7, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3 -; SSSE3-NEXT: pand %xmm3, %xmm2 -; SSSE3-NEXT: pxor %xmm8, %xmm3 -; SSSE3-NEXT: por %xmm2, %xmm3 -; SSSE3-NEXT: pslld $16, %xmm3 -; SSSE3-NEXT: psrad $16, %xmm3 -; SSSE3-NEXT: packssdw %xmm6, %xmm3 -; SSSE3-NEXT: movdqa %xmm5, %xmm2 -; SSSE3-NEXT: pxor %xmm9, %xmm2 +; SSSE3-NEXT: movdqa %xmm7, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm10, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm4 +; SSSE3-NEXT: pxor %xmm8, %xmm5 +; SSSE3-NEXT: por %xmm4, %xmm5 +; SSSE3-NEXT: pslld $16, %xmm5 +; SSSE3-NEXT: psrad $16, %xmm5 +; SSSE3-NEXT: packssdw %xmm6, %xmm5 +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: pxor %xmm9, %xmm4 ; SSSE3-NEXT: movdqa %xmm7, %xmm6 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm6 -; SSSE3-NEXT: pand %xmm6, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm3 ; SSSE3-NEXT: pxor %xmm8, %xmm6 -; SSSE3-NEXT: por %xmm5, %xmm6 +; SSSE3-NEXT: por %xmm3, %xmm6 ; SSSE3-NEXT: pslld $16, %xmm6 ; SSSE3-NEXT: psrad $16, %xmm6 -; SSSE3-NEXT: pxor %xmm4, %xmm9 +; SSSE3-NEXT: pxor %xmm2, %xmm9 ; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7 ; SSSE3-NEXT: pxor %xmm7, %xmm8 -; SSSE3-NEXT: pand %xmm4, %xmm7 +; SSSE3-NEXT: pand %xmm2, %xmm7 ; SSSE3-NEXT: por %xmm8, %xmm7 ; SSSE3-NEXT: pslld $16, %xmm7 ; SSSE3-NEXT: psrad $16, %xmm7 ; SSSE3-NEXT: packssdw %xmm6, %xmm7 -; SSSE3-NEXT: psubusw %xmm7, %xmm1 -; SSSE3-NEXT: psubusw %xmm3, %xmm0 -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSSE3-NEXT: psrad $16, %xmm2 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSSE3-NEXT: psrad $16, %xmm0 -; SSSE3-NEXT: packssdw %xmm2, %xmm0 -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSSE3-NEXT: psrad $16, %xmm2 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] -; SSSE3-NEXT: psrad $16, %xmm1 -; SSSE3-NEXT: packssdw %xmm2, %xmm1 +; SSSE3-NEXT: psubusw %xmm7, %xmm0 +; SSSE3-NEXT: psubusw %xmm5, %xmm1 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: psubus_16i32_max: @@ -1923,11 +1898,6 @@ define <8 x i16> @psubus_i16_i32_max_swapped(<8 x i16> %x, <8 x i32> %y) nounwin ; SSE2-NEXT: psrad $16, %xmm5 ; SSE2-NEXT: packssdw %xmm6, %xmm5 ; SSE2-NEXT: psubusw %xmm5, %xmm0 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_i16_i32_max_swapped: From 80f078548868d0dd3d74ab8a1deb8aa46870cdf3 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 12 Jan 2021 14:01:59 +0000 Subject: [PATCH 23/86] [mlir][Linalg] NFC - Refactor fusion APIs This revision uniformizes fusion APIs to allow passing OpOperand, OpResult and adds a finer level of control fusion. Differential Revision: https://reviews.llvm.org/D94493 --- .../Linalg/IR/LinalgStructuredOpsInterface.td | 12 ++ .../Dialect/Linalg/Transforms/Transforms.h | 14 +- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 31 ++-- mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 135 +++++++++--------- .../Linalg/Transforms/FusionOnTensors.cpp | 24 ++-- .../Dialect/Linalg/Transforms/Transforms.cpp | 5 +- .../Transforms/TestLinalgFusionTransforms.cpp | 14 +- 7 files changed, 128 insertions(+), 107 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 3fc3fa4a5556e5..f3b7181d71a59a 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -726,6 +726,18 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { getNumShapedOperands()); }] >, + InterfaceMethod< + /*desc=*/[{ + Return the OpOperands for all the shaped operands. + }], + /*retTy=*/" OpOperand&", + /*methodName=*/"getShapedOpOperand", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return *(this->getShapedOpOperands().begin() + i); + }] + >, InterfaceMethod< /*desc=*/[{ Return the range over input and output operands. diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index d816414ef8b4a4..de1658f96a8713 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -35,6 +35,7 @@ struct TiledLinalgOp { LinalgOp op; SmallVector loops; SmallVector tensorResults; + TiledLinalgOp &operator=(const TiledLinalgOp &) = default; }; /// Populates patterns for vectorization of all ConvN-D ops. @@ -412,9 +413,8 @@ struct LinalgBaseTilingPattern : public RewritePattern { LinalgTilingOptions options, LinalgMarker marker = LinalgMarker(), PatternBenefit benefit = 1); - LogicalResult - matchAndRewriteBase(Operation *op, PatternRewriter &rewriter, - SmallVectorImpl &tensorResults) const; + LogicalResult matchAndRewriteBase(Operation *op, PatternRewriter &rewriter, + TiledLinalgOp &result) const; private: /// LinalgTransformMarker handles special attribute manipulations. @@ -432,14 +432,14 @@ struct LinalgTilingPattern : public LinalgBaseTilingPattern { marker, benefit) {} LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { - SmallVector tensorResults; + TiledLinalgOp tiledLinalgOp; if (failed(LinalgBaseTilingPattern::matchAndRewriteBase(op, rewriter, - tensorResults))) + tiledLinalgOp))) return failure(); - if (tensorResults.empty()) + if (tiledLinalgOp.tensorResults.empty()) rewriter.eraseOp(op); else - rewriter.replaceOp(op, tensorResults); + rewriter.replaceOp(op, tiledLinalgOp.tensorResults); return success(); } }; diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 2ef32cfe378b30..f194209f19103b 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -92,26 +92,31 @@ findAllFusableDependences(ArrayRef ops, /// Fuses producer into consumer if the producer is structurally feasible and /// the fusion would not violate dependencies. -/// Implements the fusion part of the "tileAndFuse on buffers" -/// transformation and thus requires the `consumerdIdx`^th operand of `consumer` -/// to be a `subview` op (generally obtained by applying the tiling -/// transformation). -Optional fuseProducerOfBuffer(OpBuilder &b, LinalgOp consumer, - unsigned consumerIdx, +/// Implements the fusion part of the "tileAndFuse on buffers" transformation +/// and thus requires the `consumerOpOperand` to be a `subview` op (generally +/// obtained by applying the tiling transformation). +Optional fuseProducerOfBuffer(OpBuilder &b, + OpOperand &consumerOpOperand, const LinalgDependenceGraph &graph); /// Tensor counterpart of `fuseProducerOfBuffer`. /// This implements the fusion part of the "tileAndFuse on tensors" -/// transformation and thus requires the `consumerdIdx`^th operand of `consumer` -/// to be the result of a `subtensor` op (generally obtained by applying the -/// tiling transformation). -Optional fuseProducerOfTensor(OpBuilder &b, LinalgOp consumer, - unsigned consumerIdx); +/// transformation and thus requires the `consumerOpOperand` to be a `subtensor` +/// op (generally obtained by applying the tiling transformation). +Optional fuseProducerOfTensor(OpBuilder &b, + OpOperand &consumerOpOperand); +/// Tensor counterpart of `fuseProducerOfBuffer`. +/// This implements the fusion part of the "tileAndFuse on tensors" +/// transformation and thus requires the `consumerOpOperand` to be a `subtensor` +/// op (generally obtained by applying the tiling transformation). +/// Assumes `producerOfTensor` is a Linalg op that produces `consumerOpOperand`. +Optional fuseProducerOfTensor(OpBuilder &b, + OpResult producerOpResult, + OpOperand &consumerOpOperand); /// Fuse linalg operation on tensors, with the producer of the operand at /// position `consumerIdx` of the consumer. Optional> fuseTensorOps(PatternRewriter &rewriter, - Operation *consumer, - unsigned consumerIdx); + OpOperand &consumerOpOperand); /// Like `getShape`, but only returns statically-known information, without /// generating any new IR. For each shape dimension, returns >=0 if that diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index f9908af29313a3..8f02f3d83cf189 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -258,20 +258,19 @@ static Range getRangeFromOperandShape(OpBuilder &b, Location loc, /// `producer.getOutputBuffers()`. /// 2. Tensor case: `producerIdx` is the index of the tensor in /// `producer.getResults()`. -static LinalgOp fuse(OpBuilder &b, LinalgOp producer, unsigned producerIdx, - LinalgOp consumer, unsigned consumerIdx) { - AffineMap producerMap = producer.getOutputIndexingMap(producerIdx); - LLVM_DEBUG(llvm::dbgs() << "Producer Idx: " << producerIdx +static LinalgOp fuse(OpBuilder &b, LinalgOp producerOp, + unsigned producerOutNumber, OpOperand &consumerOpOperand) { + AffineMap producerMap = producerOp.getOutputIndexingMap(producerOutNumber); + LLVM_DEBUG(llvm::dbgs() << "Producer Idx: " << producerOutNumber << ", producer map: " << producerMap << "\n"); DenseMap fusedLoopsAndRanges; - Location loc = consumer.getLoc(); - Value shapedOperand = consumer.getShapedOperand(consumerIdx); + Value shapedOperand = consumerOpOperand.get(); for (auto en : llvm::enumerate(producerMap.getResults())) { unsigned posInProducerLoop = en.value().cast().getPosition(); - fusedLoopsAndRanges[posInProducerLoop] = - getRangeFromOperandShape(b, loc, shapedOperand, en.index()); + fusedLoopsAndRanges[posInProducerLoop] = getRangeFromOperandShape( + b, consumerOpOperand.getOwner()->getLoc(), shapedOperand, en.index()); } - return fuse(b, producer, fusedLoopsAndRanges); + return fuse(b, producerOp, fusedLoopsAndRanges); } // Encode structural fusion safety preconditions. @@ -378,9 +377,10 @@ static bool isSameSubView(Value a, Value b) { } static Optional -findFusableProducer(LinalgOp consumer, unsigned consumerIdx, +findFusableProducer(OpOperand &consumerOpOperand, const LinalgDependenceGraph &dependenceGraph) { - assert(consumer.hasBufferSemantics() && "revisit usage of shaped operand"); + LinalgOp consumerOp = cast(consumerOpOperand.getOwner()); + assert(consumerOp.hasBufferSemantics() && "revisit usage of shaped operand"); // Only consider RAW and WAW atm. for (auto depType : { @@ -388,21 +388,16 @@ findFusableProducer(LinalgOp consumer, unsigned consumerIdx, LinalgDependenceGraph::DependenceType::WAW, }) { for (auto dependence : llvm::make_filter_range( - dependenceGraph.getDependencesInto(consumer, depType), - [consumerIdx]( - LinalgDependenceGraph::LinalgDependenceGraphElem elem) { - return elem.indexingOpView->getOperandNumber() == consumerIdx; + dependenceGraph.getDependencesInto(consumerOp, depType), + [&](LinalgDependenceGraph::LinalgDependenceGraphElem elem) { + return elem.indexingOpView->get() == consumerOpOperand.get() && + elem.indexingOpView->getOperandNumber() == + consumerOpOperand.getOperandNumber(); })) { - // Check that the dependence is indeed on the input `consumerIdx` view. - Value consumedView = dependence.indexingOpView->get(); - if (!isSameSubView(consumer.getShapedOperand(consumerIdx), consumedView)) - continue; - // Consumer consumes this view, `isStructurallyFusableProducer` also // checks whether it is a strict subview of the producer view. auto producer = cast(dependence.dependentOpView->getOwner()); - Value producedView = dependence.dependentOpView->get(); LLVM_DEBUG(llvm::dbgs() << "\n" << LinalgDependenceGraph::getDependenceTypeStr(depType) @@ -412,10 +407,10 @@ findFusableProducer(LinalgOp consumer, unsigned consumerIdx, << dependence.dependentOpView->getOperandNumber() - producer.getNumInputs() << "\n"); - (void)producedView; // Simple fusability checks. - if (!isFusableInto(dependenceGraph, consumer, consumedView, producer)) + if (!isFusableInto(dependenceGraph, consumerOp, consumerOpOperand.get(), + producer)) continue; return dependence; @@ -425,29 +420,28 @@ findFusableProducer(LinalgOp consumer, unsigned consumerIdx, } Optional -mlir::linalg::fuseProducerOfBuffer(OpBuilder &b, LinalgOp consumer, - unsigned consumerIdx, +mlir::linalg::fuseProducerOfBuffer(OpBuilder &b, OpOperand &consumerOpOperand, const LinalgDependenceGraph &graph) { Optional fusableDependence = - findFusableProducer(consumer, consumerIdx, graph); + findFusableProducer(consumerOpOperand, graph); if (!fusableDependence) return {}; LinalgOp producerOp = cast(fusableDependence->dependentOpView->getOwner()); // If producer is already in the same block as consumer, we are done. - if (consumer->getBlock() == producerOp->getBlock()) + if (consumerOpOperand.get().getParentBlock() == + fusableDependence->dependentOpView->get().getParentBlock()) return {}; unsigned producerIdx = fusableDependence->dependentOpView->getOperandNumber() - producerOp.getNumInputs(); - Value consumerView = consumer.getShapedOperand(consumerIdx); // Must be a subview or a slice to guarantee there are loops we can fuse // into. - auto subView = consumerView.getDefiningOp(); - auto slice = consumerView.getDefiningOp(); + auto subView = consumerOpOperand.get().getDefiningOp(); + auto slice = consumerOpOperand.get().getDefiningOp(); if (!subView && !slice) { LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subview or slice)"); return {}; @@ -455,25 +449,25 @@ mlir::linalg::fuseProducerOfBuffer(OpBuilder &b, LinalgOp consumer, // Fuse `producer` just before `consumer`. OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(consumer.getOperation()); - ScopedContext scope(b, consumer.getLoc()); - LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumer << "\n"); + b.setInsertionPoint(consumerOpOperand.getOwner()); + ScopedContext scope(b, consumerOpOperand.getOwner()->getLoc()); + LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " + << *consumerOpOperand.getOwner() << "\n"); - auto fusedProducer = fuse(b, producerOp, producerIdx, consumer, consumerIdx); + auto fusedProducer = fuse(b, producerOp, producerIdx, consumerOpOperand); return FusionInfo{producerOp, fusedProducer}; } /// Walk back use-def chain through scf::For yields. /// Sets `producer` and `outputIndex` if it finds a producer LinalgOp -static void getProducerOfTensor(Value tensor, LinalgOp &producer, - unsigned &outputIndex) { +static void getProducerOfTensor(Value tensor, OpResult &opResult) { if (!tensor.getType().isa()) return; while (true) { + LLVM_DEBUG(llvm::dbgs() << "\ngetProducerOfTensor: " << tensor); if (auto linalgOp = tensor.getDefiningOp()) { - producer = linalgOp; - outputIndex = tensor.cast().getResultNumber(); + opResult = tensor.cast(); return; } if (auto subTensorOp = tensor.getDefiningOp()) { @@ -482,7 +476,7 @@ static void getProducerOfTensor(Value tensor, LinalgOp &producer, } if (auto blockArg = tensor.dyn_cast()) { if (auto forOp = blockArg.getDefiningOp()) { - tensor = forOp.getResult(blockArg.getArgNumber()); + tensor = *(forOp.getIterOperands().begin() + blockArg.getArgNumber()); continue; } } @@ -490,45 +484,58 @@ static void getProducerOfTensor(Value tensor, LinalgOp &producer, } } -Optional mlir::linalg::fuseProducerOfTensor(OpBuilder &b, - LinalgOp consumer, - unsigned consumerIdx) { - Value inputTensor = consumer.getInput(consumerIdx); - LinalgOp producerOp; - unsigned producerIdx; - getProducerOfTensor(inputTensor, producerOp, producerIdx); +Optional +mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpOperand &consumerOpOperand) { + Value inputTensor = consumerOpOperand.get(); + OpResult producerOpResult; + getProducerOfTensor(inputTensor, producerOpResult); + if (!producerOpResult) { + LLVM_DEBUG(llvm::dbgs() << "\nUnable to find producer"); + return {}; + } + return fuseProducerOfTensor(b, producerOpResult, consumerOpOperand); +} + +Optional +mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult, + OpOperand &consumerOpOperand) { + auto producerOp = dyn_cast(producerOpResult.getOwner()); + assert(producerOp && "expected Linalg producer"); + LinalgOp consumerOp = cast(consumerOpOperand.getOwner()); + Value inputTensor = consumerOpOperand.get(); // Must be a subtensor to guarantee there are loops we can fuse into. auto subTensor = inputTensor.getDefiningOp(); - if (!subTensor || !producerOp) { - LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subtensor)"); + if (!subTensor) { + LLVM_DEBUG(llvm::dbgs() + << "\nNot fusable, not a subtensor: " << inputTensor); return {}; } // If producer is already in the same block as consumer, we are done. - if (consumer->getBlock() == producerOp->getBlock()) + if (consumerOpOperand.get().getParentBlock() == + producerOpResult.getParentBlock()) return {}; // Insert fused `producer` just before `consumer`. OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(consumer.getOperation()); - ScopedContext scope(b, consumer.getLoc()); - LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumer << "\n"); - LinalgOp fusedProducer = - fuse(b, producerOp, producerIdx, consumer, consumerIdx); + b.setInsertionPoint(consumerOp); + ScopedContext scope(b, consumerOp->getLoc()); + LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumerOp << "\n"); + LinalgOp fusedProducer = fuse( + b, producerOp, producerOpResult.getResultNumber(), consumerOpOperand); // Replace use. // Canonicalizations are not guaranteed to have happened before constructing // `fusedProducer`. In the tensor case this can result in temporary type // mismatches. Insert a `tensor.cast` op to propagate the transformation // invariant that types are compatible. - Value def = fusedProducer->getResult(producerIdx); - OpOperand &use = consumer->getOpOperand(consumerIdx); - Type consumerType = use.get().getType(); + Value def = fusedProducer->getResult(producerOpResult.getResultNumber()); + Type consumerType = consumerOpOperand.get().getType(); if (consumerType != def.getType()) def = b.create(fusedProducer.getLoc(), consumerType, def); - use.set(def); - return FusionInfo{producerOp, fusedProducer}; + consumerOpOperand.set(def); + return FusionInfo{cast(producerOpResult.getOwner()), fusedProducer}; } /// Prune all dimensions that are of reduction iterator type from `map`. @@ -734,11 +741,9 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences( // in the meanwhile disallow such a fusion. DenseMap fusedProducerIndexingMap; for (LinalgOp op : reverse(ops)) { - for (auto operandIndex : - llvm::seq(0, op.getNumShapedOperands())) { + for (OpOperand &opOperand : op.getShapedOpOperands()) { Optional - fusableDependence = - findFusableProducer(op, operandIndex, dependenceGraph); + fusableDependence = findFusableProducer(opOperand, dependenceGraph); if (!fusableDependence) continue; LinalgOp producerOp = @@ -759,7 +764,7 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences( op.emitRemark( "unhandled non permutation indexing map for fused view in " "producer for operand at index ") - << operandIndex; + << opOperand.getOperandNumber(); return FusableOpDependencesTy{}; } @@ -770,7 +775,7 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences( op.emitRemark( "unhandled case where indexing map for fused view in the consumer " "is not a projected permutation while fusing at index ") - << operandIndex; + << opOperand.getOperandNumber(); return FusableOpDependencesTy{}; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp index 833662d282b6d7..670d456ad2f248 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -178,8 +178,10 @@ static void generateFusedTensorOpRegion(PatternRewriter &rewriter, } static Optional> -fuseTensorOpsImpl(LinalgOp producer, LinalgOp consumer, unsigned consumerIdx, +fuseTensorOpsImpl(LinalgOp producer, OpOperand &consumerOpOperand, PatternRewriter &rewriter) { + LinalgOp consumer = cast(consumerOpOperand.getOwner()); + unsigned consumerIdx = consumerOpOperand.getOperandNumber(); if (!areTensorOpsFusable(producer, consumer, consumerIdx)) return llvm::None; @@ -1027,21 +1029,19 @@ struct FoldSplatConstants : public OpRewritePattern { } // namespace Optional> -mlir::linalg::fuseTensorOps(PatternRewriter &rewriter, Operation *consumer, - unsigned consumerIdx) { - if (consumerIdx >= consumer->getNumOperands()) - return llvm::None; - Operation *producer = consumer->getOperand(consumerIdx).getDefiningOp(); +mlir::linalg::fuseTensorOps(PatternRewriter &rewriter, + OpOperand &consumerOpOperand) { + Operation *producer = consumerOpOperand.get().getDefiningOp(); if (!producer || producer->getNumResults() != 1) return llvm::None; // Fuse when consumer is GenericOp or IndexedGenericOp. - if (!isa(consumer) || + if (!isa(consumerOpOperand.getOwner()) || !isa(producer)) return llvm::None; - return fuseTensorOpsImpl(cast(producer), cast(consumer), - consumerIdx, rewriter); + return fuseTensorOpsImpl(cast(producer), consumerOpOperand, + rewriter); } namespace { @@ -1053,12 +1053,12 @@ struct FuseTensorOps : public OpRewritePattern { LogicalResult matchAndRewrite(LinalgOpTy op, PatternRewriter &rewriter) const override { // Find the first operand that is defined by another generic op on tensors. - for (auto operandNum : llvm::seq(0, op->getNumOperands())) { - Operation *producer = op->getOperand(operandNum).getDefiningOp(); + for (OpOperand &opOperand : op.getShapedOpOperands()) { + Operation *producer = opOperand.get().getDefiningOp(); if (!producer) continue; Optional> fusedOpResults = - fuseTensorOps(rewriter, op, operandNum); + fuseTensorOps(rewriter, opOperand); if (fusedOpResults) { rewriter.replaceOp(op, *fusedOpResults); if (producer->use_empty()) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index c5d811c41edb0b..5b6302a7e5a240 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -117,8 +117,7 @@ mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( options(options) {} LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewriteBase( - Operation *op, PatternRewriter &rewriter, - SmallVectorImpl &tensorResults) const { + Operation *op, PatternRewriter &rewriter, TiledLinalgOp &result) const { LinalgOp linalgOp = dyn_cast(op); if (!linalgOp) return failure(); @@ -131,7 +130,7 @@ LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewriteBase( return failure(); // Return relevant information to derived pattern. - tensorResults = res->tensorResults; + result = *res; // New marker if specified. marker.replaceLinalgMarker(rewriter, res->op.getOperation()); diff --git a/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp index 046fad43c3bf96..5d55f0375f371a 100644 --- a/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp @@ -135,14 +135,14 @@ static LogicalResult fuseLinalgOpsGreedily(FuncOp f) { // Tile and Fuse for tensors inputs (TODO: all tensor operands). bool changed = false; for (LinalgOp linalgOp : llvm::reverse(linalgOps)) { - for (auto en : llvm::enumerate(linalgOp.getShapedOperands())) { - if (en.value().getType().isa()) { + for (OpOperand &opOperand : linalgOp.getShapedOpOperands()) { + if (opOperand.get().getType().isa()) { // TODO: LinalgDependenceGraph should be able to update itself. // The current naive and expensive reconstruction of the graph should be // removed. linalg::Aliases aliases; linalg::LinalgDependenceGraph graph(aliases, linalgOps); - if (auto info = fuseProducerOfBuffer(b, linalgOp, en.index(), graph)) { + if (auto info = fuseProducerOfBuffer(b, opOperand, graph)) { auto *originalOp = info->originalProducer.getOperation(); eraseSet.insert(originalOp); auto *originalOpInLinalgOpsVector = @@ -151,11 +151,11 @@ static LogicalResult fuseLinalgOpsGreedily(FuncOp f) { changed = true; } } else { - assert(en.value().getType().isa()); - // Tile and Fuse tensor input (TODO: init_tensors too). - if (en.index() >= linalgOp.getNumInputs()) + assert(opOperand.get().getType().isa()); + // Tile and Fuse tensor input. + if (opOperand.getOperandNumber() >= linalgOp.getNumInputs()) continue; - if (auto info = fuseProducerOfTensor(b, linalgOp, en.index())) { + if (auto info = fuseProducerOfTensor(b, opOperand)) { auto *originalOp = info->originalProducer.getOperation(); auto *originalOpInLinalgOpsVector = std::find(linalgOps.begin(), linalgOps.end(), originalOp); From a6759477129c98820a56231d2f3fd27e5fe31ab3 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Mon, 11 Jan 2021 09:46:27 -0500 Subject: [PATCH 24/86] [TableGen] Improve error message for semicolon after braced body. Add a test for this message. Differential Revision: https://reviews.llvm.org/D94412 --- llvm/lib/TableGen/TGParser.cpp | 19 ++++++++++++-- llvm/test/TableGen/spurious-semi.td | 39 +++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 llvm/test/TableGen/spurious-semi.td diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 7918e2ac98f684..24949f0b2b4d5b 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -2836,7 +2836,7 @@ bool TGParser::ParseBody(Record *CurRec) { return false; if (!consume(tgtok::l_brace)) - return TokError("Expected ';' or '{' to start body"); + return TokError("Expected '{' to start body or ';' for declaration only"); // An object body introduces a new scope for local variables. TGLocalVarScope *BodyScope = PushLocalScope(); @@ -2849,6 +2849,14 @@ bool TGParser::ParseBody(Record *CurRec) { // Eat the '}'. Lex.Lex(); + + // If we have a semicolon, print a gentle error. + SMLoc SemiLoc = Lex.getLoc(); + if (consume(tgtok::semi)) { + PrintError(SemiLoc, "A class or def body should not end with a semicolon"); + PrintNote("Semicolon ignored; remove to eliminate this error"); + } + return false; } @@ -3432,6 +3440,13 @@ bool TGParser::ParseMultiClass() { } Lex.Lex(); // eat the '}'. + // If we have a semicolon, print a gentle error. + SMLoc SemiLoc = Lex.getLoc(); + if (consume(tgtok::semi)) { + PrintError(SemiLoc, "A multiclass body should not end with a semicolon"); + PrintNote("Semicolon ignored; remove to eliminate this error"); + } + PopLocalScope(MulticlassScope); } @@ -3623,7 +3638,7 @@ bool TGParser::ParseFile() { if (Lex.getCode() == tgtok::Eof) return false; - return TokError("Unexpected input at top level"); + return TokError("Unexpected token at top level"); } // Check an assertion: Obtain the condition value and be sure it is true. diff --git a/llvm/test/TableGen/spurious-semi.td b/llvm/test/TableGen/spurious-semi.td new file mode 100644 index 00000000000000..9e3b7ada02de87 --- /dev/null +++ b/llvm/test/TableGen/spurious-semi.td @@ -0,0 +1,39 @@ +// RUN: llvm-tblgen %s | FileCheck %s +// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s + +// This file tests the error message that is printed when a body is +// terminated with a semicolon in addition to the close brace. + +// CHECK: class Class0 +// CHECK: def Rec0 + +class Class0 { +} + +def Rec0 { +} + +multiclass MC0 { + def R; +} + +#ifdef ERROR1 + +// ERROR1: error: A class or def body should not end with a semicolon +// ERROR1: Semicolon ignored +// ERROR1: error: A class or def body should not end with a semicolon +// ERROR1: Semicolon ignored +// ERROR1: error: A multiclass body should not end with a semicolon +// ERROR1: Semicolon ignored + +class Class1 { +}; + +def Rec1 { +}; + +multiclass MC1 { + def R; +}; + +#endif From 0bd9a1369112f7d0a8a3b94a050fd5ed37037e9b Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 12 Jan 2021 09:42:25 -0500 Subject: [PATCH 25/86] [mlir][openacc] Use TableGen information for default enum Use TableGen and information in ACC.td for the Default enum in the OpenACC dialect. This patch generalize what was done for OpenMP for directives. Follow up patch after D93576 Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D93710 --- llvm/include/llvm/Frontend/OpenACC/ACC.td | 4 +-- .../mlir/Dialect/OpenACC/CMakeLists.txt | 6 +++- .../mlir/Dialect/OpenACC/OpenACCOps.td | 14 ++------ .../{openmp-common.td => directive-common.td} | 7 +++- mlir/tools/mlir-tblgen/CMakeLists.txt | 2 +- ...MPCommonGen.cpp => DirectiveCommonGen.cpp} | 32 +++++++++++-------- 6 files changed, 36 insertions(+), 29 deletions(-) rename mlir/test/mlir-tblgen/{openmp-common.td => directive-common.td} (83%) rename mlir/tools/mlir-tblgen/{OpenMPCommonGen.cpp => DirectiveCommonGen.cpp} (72%) diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index d53d3132c96949..58bb73fb64029f 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -80,8 +80,8 @@ def ACCC_Create : Clause<"create"> { } // 2.5.15 -def ACC_Default_none : ClauseVal<"none", 1, 0> { let isDefault = 1; } -def ACC_Default_present : ClauseVal<"present", 0, 0> {} +def ACC_Default_none : ClauseVal<"none", 1, 1> { let isDefault = 1; } +def ACC_Default_present : ClauseVal<"present", 0, 1> {} def ACCC_Default : Clause<"default"> { let flangClassValue = "AccDefaultClause"; diff --git a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt index 32b92096c71b60..32b0c7f902aedd 100644 --- a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt @@ -1,3 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.td) +mlir_tablegen(AccCommon.td --gen-directive-decl) +add_public_tablegen_target(acc_common_td) + set(LLVM_TARGET_DEFINITIONS OpenACCOps.td) mlir_tablegen(OpenACCOpsDialect.h.inc -gen-dialect-decls -dialect=acc) mlir_tablegen(OpenACCOps.h.inc -gen-op-decls) @@ -6,4 +10,4 @@ mlir_tablegen(OpenACCOpsEnums.h.inc -gen-enum-decls) mlir_tablegen(OpenACCOpsEnums.cpp.inc -gen-enum-defs) add_mlir_doc(OpenACCOps -gen-dialect-doc OpenACCDialect Dialects/) add_public_tablegen_target(MLIROpenACCOpsIncGen) - +add_dependencies(OpenACCDialectDocGen acc_common_td) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 6feec888ec8e27..a48af00535a771 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -14,6 +14,7 @@ #define OPENACC_OPS include "mlir/IR/OpBase.td" +include "mlir/Dialect/OpenACC/AccCommon.td" def OpenACC_Dialect : Dialect { let name = "acc"; @@ -63,15 +64,6 @@ def OpenACC_ReductionOpAttr : StrEnumAttr<"ReductionOpAttr", // Type used in operation below. def IntOrIndex : AnyTypeOf<[AnyInteger, Index]>; -// Parallel and data op default enumeration -def OpenACC_DefaultNone : StrEnumAttrCase<"none">; -def OpenACC_DefaultPresent : StrEnumAttrCase<"present">; -def OpenACC_DefaultAttr : StrEnumAttr<"DefaultAttr", - "default attribute values", - [OpenACC_DefaultNone, OpenACC_DefaultPresent]> { - let cppNamespace = "::mlir::acc"; -} - //===----------------------------------------------------------------------===// // 2.5.1 parallel Construct //===----------------------------------------------------------------------===// @@ -118,7 +110,7 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", Variadic:$attachOperands, Variadic:$gangPrivateOperands, Variadic:$gangFirstPrivateOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr); let regions = (region AnyRegion:$region); @@ -190,7 +182,7 @@ def OpenACC_DataOp : OpenACC_Op<"data", Variadic:$presentOperands, Variadic:$deviceptrOperands, Variadic:$attachOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr); let regions = (region AnyRegion:$region); diff --git a/mlir/test/mlir-tblgen/openmp-common.td b/mlir/test/mlir-tblgen/directive-common.td similarity index 83% rename from mlir/test/mlir-tblgen/openmp-common.td rename to mlir/test/mlir-tblgen/directive-common.td index 579988beda9432..96439b40e9e8fb 100644 --- a/mlir/test/mlir-tblgen/openmp-common.td +++ b/mlir/test/mlir-tblgen/directive-common.td @@ -2,6 +2,11 @@ include "llvm/Frontend/Directive/DirectiveBase.td" +def TestDirectiveLanguage : DirectiveLanguage { + let name = "Tdl"; + let cppNamespace = "tdl"; +} + def TDLCV_vala : ClauseVal<"vala",1,1> {} def TDLCV_valb : ClauseVal<"valb",2,1> {} def TDLCV_valc : ClauseVal<"valc",3,0> { let isDefault = 1; } @@ -22,5 +27,5 @@ def TDLC_ClauseA : Clause<"clausea"> { // CHECK: "ClauseAKind", // CHECK: "AKind Clause", // CHECK: [AKindvala,AKindvalb]> { -// CHECK: let cppNamespace = "::mlir::omp"; +// CHECK: let cppNamespace = "::mlir::tdl"; // CHECK: } diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt index 119d03573a66dc..32c0d739b51701 100644 --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS add_tablegen(mlir-tblgen MLIR DialectGen.cpp + DirectiveCommonGen.cpp EnumsGen.cpp LLVMIRConversionGen.cpp LLVMIRIntrinsicGen.cpp @@ -15,7 +16,6 @@ add_tablegen(mlir-tblgen MLIR OpFormatGen.cpp OpInterfacesGen.cpp OpPythonBindingGen.cpp - OpenMPCommonGen.cpp PassCAPIGen.cpp PassDocGen.cpp PassGen.cpp diff --git a/mlir/tools/mlir-tblgen/OpenMPCommonGen.cpp b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp similarity index 72% rename from mlir/tools/mlir-tblgen/OpenMPCommonGen.cpp rename to mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp index dbe888e6ab59bd..aaad51794821f7 100644 --- a/mlir/tools/mlir-tblgen/OpenMPCommonGen.cpp +++ b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp @@ -1,4 +1,4 @@ -//===========- OpenMPCommonGen.cpp - OpenMP common info generator -===========// +//===========- DirectiveCommonGen.cpp - Directive common info generator -=====// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -25,21 +25,26 @@ using llvm::RecordKeeper; using llvm::Twine; // LLVM has multiple places (Clang, Flang, MLIR) where information about -// the OpenMP directives, and clauses are needed. It is good software +// the directives (OpenMP/OpenACC), and clauses are needed. It is good software // engineering to keep the common information in a single place to avoid // duplication, reduce engineering effort and prevent mistakes. -// Currently that common place is llvm/include/llvm/Frontend/OpenMP/OMP.td. +// Currently that common place is llvm/include/llvm/Frontend/OpenMP/OMP.td for +// OpenMP and llvm/include/llvm/Frontend/OpenACC/ACC.td for OpenACC. // We plan to use this tablegen source to generate all the required // declarations, functions etc. // -// Some OpenMP clauses accept only a fixed set of values as inputs. These -// can be represented as a String Enum Attribute (StrEnumAttr) in MLIR ODS. -// The emitDecls function below currently generates these enumerations. The +// Some OpenMP/OpenACC clauses accept only a fixed set of values as inputs. +// These can be represented as a String Enum Attribute (StrEnumAttr) in MLIR +// ODS. The emitDecls function below currently generates these enumerations. The // name of the enumeration is specified in the enumClauseValue field of // Clause record in OMP.td. This name can be used to specify the type of the // OpenMP operation's operand. The allowedClauseValues field provides the list // of ClauseValues which are part of the enumeration. static bool emitDecls(const RecordKeeper &recordKeeper, raw_ostream &os) { + const auto &directiveLanguages = + recordKeeper.getAllDerivedDefinitions("DirectiveLanguage"); + assert(directiveLanguages.size() != 0 && "DirectiveLanguage missing."); + const auto &clauses = recordKeeper.getAllDerivedDefinitions("Clause"); for (const auto &r : clauses) { @@ -73,16 +78,17 @@ static bool emitDecls(const RecordKeeper &recordKeeper, raw_ostream &os) { os << ","; } os << "]> {\n"; - os << " let cppNamespace = \"::mlir::omp\";\n"; + os << " let cppNamespace = \"::mlir::" + << directiveLanguages[0]->getValueAsString("cppNamespace") << "\";\n"; os << "}\n"; } return false; } // Registers the generator to mlir-tblgen. -static mlir::GenRegistration - genDirectiveDecls("gen-directive-decl", - "Generate declarations for directives (OpenMP etc.)", - [](const RecordKeeper &records, raw_ostream &os) { - return emitDecls(records, os); - }); +static mlir::GenRegistration genDirectiveDecls( + "gen-directive-decl", + "Generate declarations for directives (OpenMP/OpenACC etc.)", + [](const RecordKeeper &records, raw_ostream &os) { + return emitDecls(records, os); + }); From 07605ea1f3c94e0a23450f5c9ab3902862d7ff03 Mon Sep 17 00:00:00 2001 From: Bevin Hansson Date: Tue, 12 Jan 2021 15:40:36 +0100 Subject: [PATCH 26/86] [X86] Improved lowering for saturating float to int. Adapted from D54696 by @nikic. This patch improves lowering of saturating float to int conversions, FP_TO_[SU]INT_SAT, for X86. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D86079 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 164 +++++++ llvm/lib/Target/X86/X86ISelLowering.h | 1 + llvm/test/CodeGen/X86/fptosi-sat-scalar.ll | 495 +++++++-------------- llvm/test/CodeGen/X86/fptoui-sat-scalar.ll | 441 ++++++------------ 4 files changed, 457 insertions(+), 644 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f28e28689806fa..65b784f3184276 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -284,6 +284,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } + if (Subtarget.hasSSE2()) { + // Custom lowering for saturating float to int conversions. + // We handle promotion to larger result types manually. + for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) { + setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); + setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); + } + if (Subtarget.is64Bit()) { + setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); + } + } + // Handle address space casts between mixed sized pointers. setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); @@ -21428,6 +21441,155 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N, return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI); } +SDValue +X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { + // This is based on the TargetLowering::expandFP_TO_INT_SAT implementation, + // but making use of X86 specifics to produce better instruction sequences. + SDNode *Node = Op.getNode(); + bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; + unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; + SDLoc dl(SDValue(Node, 0)); + SDValue Src = Node->getOperand(0); + + // There are three types involved here: SrcVT is the source floating point + // type, DstVT is the type of the result, and TmpVT is the result of the + // intermediate FP_TO_*INT operation we'll use (which may be a promotion of + // DstVT). + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + EVT TmpVT = DstVT; + + // This code is only for floats and doubles. Fall back to generic code for + // anything else. + if (!isScalarFPTypeInSSEReg(SrcVT)) + return SDValue(); + + unsigned SatWidth = Node->getConstantOperandVal(1); + unsigned DstWidth = DstVT.getScalarSizeInBits(); + unsigned TmpWidth = TmpVT.getScalarSizeInBits(); + assert(SatWidth <= DstWidth && SatWidth <= TmpWidth && + "Expected saturation width smaller than result width"); + + // Promote result of FP_TO_*INT to at least 32 bits. + if (TmpWidth < 32) { + TmpVT = MVT::i32; + TmpWidth = 32; + } + + // Promote conversions to unsigned 32-bit to 64-bit, because it will allow + // us to use a native signed conversion instead. + if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) { + TmpVT = MVT::i64; + TmpWidth = 64; + } + + // If the saturation width is smaller than the size of the temporary result, + // we can always use signed conversion, which is native. + if (SatWidth < TmpWidth) + FpToIntOpcode = ISD::FP_TO_SINT; + + // Determine minimum and maximum integer values and their corresponding + // floating-point values. + APInt MinInt, MaxInt; + if (IsSigned) { + MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + } else { + MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + } + + APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + + APFloat::opStatus MinStatus = MinFloat.convertFromAPInt( + MinInt, IsSigned, APFloat::rmTowardZero); + APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt( + MaxInt, IsSigned, APFloat::rmTowardZero); + bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) + && !(MaxStatus & APFloat::opStatus::opInexact); + + SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT); + SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT); + + // If the integer bounds are exactly representable as floats, emit a + // min+max+fptoi sequence. Otherwise use comparisons and selects. + if (AreExactFloatBounds) { + if (DstVT != TmpVT) { + // Clamp by MinFloat from below. If Src is NaN, propagate NaN. + SDValue MinClamped = DAG.getNode( + X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src); + // Clamp by MaxFloat from above. If Src is NaN, propagate NaN. + SDValue BothClamped = DAG.getNode( + X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped); + // Convert clamped value to integer. + SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped); + + // NaN will become INDVAL, with the top bit set and the rest zero. + // Truncation will discard the top bit, resulting in zero. + return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); + } + + // Clamp by MinFloat from below. If Src is NaN, the result is MinFloat. + SDValue MinClamped = DAG.getNode( + X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode); + // Clamp by MaxFloat from above. NaN cannot occur. + SDValue BothClamped = DAG.getNode( + X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode); + // Convert clamped value to integer. + SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped); + + if (!IsSigned) { + // In the unsigned case we're done, because we mapped NaN to MinFloat, + // which is zero. + return FpToInt; + } + + // Otherwise, select zero if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC( + dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); + } + + SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); + SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT); + + // Result of direct conversion, which may be selected away. + SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src); + + if (DstVT != TmpVT) { + // NaN will become INDVAL, with the top bit set and the rest zero. + // Truncation will discard the top bit, resulting in zero. + FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); + } + + SDValue Select = FpToInt; + // For signed conversions where we saturate to the same size as the + // result type of the fptoi instructions, INDVAL coincides with integer + // minimum, so we don't need to explicitly check it. + if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) { + // If Src ULT MinFloat, select MinInt. In particular, this also selects + // MinInt if Src is NaN. + Select = DAG.getSelectCC( + dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT); + } + + // If Src OGT MaxFloat, select MaxInt. + Select = DAG.getSelectCC( + dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT); + + // In the unsigned case we are done, because we mapped NaN to MinInt, which + // is already zero. The promoted case was already handled above. + if (!IsSigned || DstVT != TmpVT) { + return Select; + } + + // Otherwise, select 0 if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC( + dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); +} + SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); @@ -29807,6 +29969,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG); case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FP_ROUND: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 668132239dd3eb..8b71c8394c01ab 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1499,6 +1499,7 @@ namespace llvm { SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll index f7a26c6a90b7da..23035a2f7e4092 100644 --- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll @@ -73,31 +73,20 @@ define i1 @test_signed_i1_f32(float %f) nounwind { ; X86-SSE-LABEL: test_signed_i1_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $255, %eax -; X86-SSE-NEXT: cmovael %ecx, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmoval %ecx, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ecx, %eax +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i1_f32: ; X64: # %bb.0: -; X64-NEXT: cvttss2si %xmm0, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovael %ecx, %eax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmoval %ecx, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovpl %ecx, %eax +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i1 @llvm.fptosi.sat.i1.f32(float %f) @@ -155,31 +144,20 @@ define i8 @test_signed_i8_f32(float %f) nounwind { ; X86-SSE-LABEL: test_signed_i8_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $128, %ecx -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $127, %edx -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i8_f32: ; X64: # %bb.0: +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 ; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $128, %ecx -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $127, %edx -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i8 @llvm.fptosi.sat.i8.f32(float %f) @@ -238,31 +216,20 @@ define i13 @test_signed_i13_f32(float %f) nounwind { ; X86-SSE-LABEL: test_signed_i13_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $61440, %ecx # imm = 0xF000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $4095, %edx # imm = 0xFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i13_f32: ; X64: # %bb.0: +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 ; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $61440, %ecx # imm = 0xF000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $4095, %edx # imm = 0xFFF -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i13 @llvm.fptosi.sat.i13.f32(float %f) @@ -321,31 +288,20 @@ define i16 @test_signed_i16_f32(float %f) nounwind { ; X86-SSE-LABEL: test_signed_i16_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $32768, %ecx # imm = 0x8000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $32767, %edx # imm = 0x7FFF -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i16_f32: ; X64: # %bb.0: +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 ; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $32768, %ecx # imm = 0x8000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $32767, %edx # imm = 0x7FFF -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i16 @llvm.fptosi.sat.i16.f32(float %f) @@ -404,30 +360,22 @@ define i19 @test_signed_i19_f32(float %f) nounwind { ; X86-SSE-LABEL: test_signed_i19_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $262143, %edx # imm = 0x3FFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx ; X86-SSE-NEXT: xorl %eax, %eax ; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: minss {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttss2si %xmm0, %ecx +; X86-SSE-NEXT: cmovnpl %ecx, %eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i19_f32: ; X64: # %bb.0: -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $262143, %edx # imm = 0x3FFFF -; X64-NEXT: cmovbel %ecx, %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax +; X64-NEXT: maxss {{.*}}(%rip), %xmm0 +; X64-NEXT: minss {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttss2si %xmm0, %ecx +; X64-NEXT: cmovnpl %ecx, %eax ; X64-NEXT: retq %x = call i19 @llvm.fptosi.sat.i19.f32(float %f) ret i19 %x @@ -487,28 +435,22 @@ define i32 @test_signed_i32_f32(float %f) nounwind { ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: cvttss2si %xmm0, %eax ; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx +; X86-SSE-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-SSE-NEXT: cmovbel %eax, %ecx ; X86-SSE-NEXT: xorl %eax, %eax ; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: cmovnpl %ecx, %eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i32_f32: ; X64: # %bb.0: ; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X64-NEXT: cmovbel %ecx, %edx +; X64-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmovbel %eax, %ecx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax +; X64-NEXT: cmovnpl %ecx, %eax ; X64-NEXT: retq %x = call i32 @llvm.fptosi.sat.i32.f32(float %f) ret i32 %x @@ -731,14 +673,11 @@ define i64 @test_signed_i64_f32(float %f) nounwind { ; X64: # %bb.0: ; X64-NEXT: cvttss2si %xmm0, %rax ; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: cmovaeq %rax, %rcx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: cmovbeq %rcx, %rdx +; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NEXT: cmovbeq %rax, %rcx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpq %rdx, %rax +; X64-NEXT: cmovnpq %rcx, %rax ; X64-NEXT: retq %x = call i64 @llvm.fptosi.sat.i64.f32(float %f) ret i64 %x @@ -1154,31 +1093,20 @@ define i1 @test_signed_i1_f64(double %f) nounwind { ; X86-SSE-LABEL: test_signed_i1_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $255, %eax -; X86-SSE-NEXT: cmovael %ecx, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 -; X86-SSE-NEXT: cmoval %ecx, %eax -; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ecx, %eax +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i1_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovael %ecx, %eax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmoval %ecx, %eax -; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovpl %ecx, %eax +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: xorpd %xmm0, %xmm0 +; X64-NEXT: minsd %xmm1, %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i1 @llvm.fptosi.sat.i1.f64(double %f) @@ -1236,31 +1164,20 @@ define i8 @test_signed_i8_f64(double %f) nounwind { ; X86-SSE-LABEL: test_signed_i8_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $128, %ecx -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $127, %edx -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i8_f64: ; X64: # %bb.0: +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 ; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $128, %ecx -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $127, %edx -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i8 @llvm.fptosi.sat.i8.f64(double %f) @@ -1319,31 +1236,20 @@ define i13 @test_signed_i13_f64(double %f) nounwind { ; X86-SSE-LABEL: test_signed_i13_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $61440, %ecx # imm = 0xF000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $4095, %edx # imm = 0xFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i13_f64: ; X64: # %bb.0: +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 ; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $61440, %ecx # imm = 0xF000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $4095, %edx # imm = 0xFFF -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i13 @llvm.fptosi.sat.i13.f64(double %f) @@ -1402,31 +1308,20 @@ define i16 @test_signed_i16_f64(double %f) nounwind { ; X86-SSE-LABEL: test_signed_i16_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $32768, %ecx # imm = 0x8000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $32767, %edx # imm = 0x7FFF -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i16_f64: ; X64: # %bb.0: +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 ; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $32768, %ecx # imm = 0x8000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $32767, %edx # imm = 0x7FFF -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i16 @llvm.fptosi.sat.i16.f64(double %f) @@ -1485,30 +1380,22 @@ define i19 @test_signed_i19_f64(double %f) nounwind { ; X86-SSE-LABEL: test_signed_i19_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $262143, %edx # imm = 0x3FFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx ; X86-SSE-NEXT: xorl %eax, %eax ; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: minsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx +; X86-SSE-NEXT: cmovnpl %ecx, %eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i19_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $262143, %edx # imm = 0x3FFFF -; X64-NEXT: cmovbel %ecx, %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax +; X64-NEXT: maxsd {{.*}}(%rip), %xmm0 +; X64-NEXT: minsd {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %ecx +; X64-NEXT: cmovnpl %ecx, %eax ; X64-NEXT: retq %x = call i19 @llvm.fptosi.sat.i19.f64(double %f) ret i19 %x @@ -1566,30 +1453,22 @@ define i32 @test_signed_i32_f64(double %f) nounwind { ; X86-SSE-LABEL: test_signed_i32_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx ; X86-SSE-NEXT: xorl %eax, %eax ; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: minsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx +; X86-SSE-NEXT: cmovnpl %ecx, %eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_signed_i32_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X64-NEXT: cmovbel %ecx, %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax +; X64-NEXT: maxsd {{.*}}(%rip), %xmm0 +; X64-NEXT: minsd {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %ecx +; X64-NEXT: cmovnpl %ecx, %eax ; X64-NEXT: retq %x = call i32 @llvm.fptosi.sat.i32.f64(double %f) ret i32 %x @@ -1695,16 +1574,12 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; ; X64-LABEL: test_signed_i50_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %rax -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $-562949953421312, %rcx # imm = 0xFFFE000000000000 -; X64-NEXT: cmovaeq %rax, %rcx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $562949953421311, %rdx # imm = 0x1FFFFFFFFFFFF -; X64-NEXT: cmovbeq %rcx, %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovnpq %rdx, %rax +; X64-NEXT: maxsd {{.*}}(%rip), %xmm0 +; X64-NEXT: minsd {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %rcx +; X64-NEXT: cmovnpq %rcx, %rax ; X64-NEXT: retq %x = call i50 @llvm.fptosi.sat.i50.f64(double %f) ret i50 %x @@ -1812,14 +1687,11 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; X64: # %bb.0: ; X64-NEXT: cvttsd2si %xmm0, %rax ; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: cmovaeq %rax, %rcx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: cmovbeq %rcx, %rdx +; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NEXT: cmovbeq %rax, %rcx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm0, %xmm0 -; X64-NEXT: cmovnpq %rdx, %rax +; X64-NEXT: cmovnpq %rcx, %rax ; X64-NEXT: retq %x = call i64 @llvm.fptosi.sat.i64.f64(double %f) ret i64 %x @@ -2242,16 +2114,10 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $255, %eax -; X86-SSE-NEXT: cmovael %ecx, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmoval %ecx, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ecx, %eax +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2261,16 +2127,11 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: cvttss2si %xmm0, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovael %ecx, %eax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmoval %ecx, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovpl %ecx, %eax +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2336,16 +2197,10 @@ define i8 @test_signed_i8_f16(half %f) nounwind { ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $128, %ecx -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $127, %edx -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2355,16 +2210,11 @@ define i8 @test_signed_i8_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 ; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $128, %ecx -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $127, %edx -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2431,16 +2281,10 @@ define i13 @test_signed_i13_f16(half %f) nounwind { ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $61440, %ecx # imm = 0xF000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $4095, %edx # imm = 0xFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2450,16 +2294,11 @@ define i13 @test_signed_i13_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 ; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $61440, %ecx # imm = 0xF000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $4095, %edx # imm = 0xFFF -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2526,16 +2365,10 @@ define i16 @test_signed_i16_f16(half %f) nounwind { ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $32768, %ecx # imm = 0x8000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $32767, %edx # imm = 0x7FFF -; X86-SSE-NEXT: cmovbel %ecx, %edx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2545,16 +2378,11 @@ define i16 @test_signed_i16_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 ; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $32768, %ecx # imm = 0x8000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $32767, %edx # imm = 0x7FFF -; X64-NEXT: cmovbel %ecx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2621,16 +2449,12 @@ define i19 @test_signed_i19_f16(half %f) nounwind { ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $262143, %edx # imm = 0x3FFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx ; X86-SSE-NEXT: xorl %eax, %eax ; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: maxss {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: minss {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttss2si %xmm0, %ecx +; X86-SSE-NEXT: cmovnpl %ecx, %eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl ; @@ -2639,16 +2463,12 @@ define i19 @test_signed_i19_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $262143, %edx # imm = 0x3FFFF -; X64-NEXT: cmovbel %ecx, %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax +; X64-NEXT: maxss {{.*}}(%rip), %xmm0 +; X64-NEXT: minss {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttss2si %xmm0, %ecx +; X64-NEXT: cmovnpl %ecx, %eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq %x = call i19 @llvm.fptosi.sat.i19.f16(half %f) @@ -2716,14 +2536,11 @@ define i32 @test_signed_i32_f16(half %f) nounwind { ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: cvttss2si %xmm0, %eax ; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-SSE-NEXT: cmovbel %ecx, %edx +; X86-SSE-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-SSE-NEXT: cmovbel %eax, %ecx ; X86-SSE-NEXT: xorl %eax, %eax ; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovnpl %edx, %eax +; X86-SSE-NEXT: cmovnpl %ecx, %eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl ; @@ -2734,14 +2551,11 @@ define i32 @test_signed_i32_f16(half %f) nounwind { ; X64-NEXT: callq __gnu_h2f_ieee@PLT ; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X64-NEXT: cmovbel %ecx, %edx +; X64-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmovbel %eax, %ecx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpl %edx, %eax +; X64-NEXT: cmovnpl %ecx, %eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq %x = call i32 @llvm.fptosi.sat.i32.f16(half %f) @@ -2984,14 +2798,11 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; X64-NEXT: callq __gnu_h2f_ieee@PLT ; X64-NEXT: cvttss2si %xmm0, %rax ; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: cmovaeq %rax, %rcx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: cmovbeq %rcx, %rdx +; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NEXT: cmovbeq %rax, %rcx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm0 -; X64-NEXT: cmovnpq %rdx, %rax +; X64-NEXT: cmovnpq %rcx, %rax ; X64-NEXT: popq %rcx ; X64-NEXT: retq %x = call i64 @llvm.fptosi.sat.i64.f16(half %f) diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll index 3b74e639eb4223..294189815c490a 100644 --- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll @@ -60,28 +60,21 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i1_f32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $1, %eax -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i1_f32: ; X64: # %bb.0: -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i1 @llvm.fptoui.sat.i1.f32(float %f) @@ -130,28 +123,21 @@ define i8 @test_unsigned_i8_f32(float %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i8_f32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $255, %eax -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i8_f32: ; X64: # %bb.0: -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i8 @llvm.fptoui.sat.i8.f32(float %f) @@ -199,28 +185,21 @@ define i13 @test_unsigned_i13_f32(float %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i13_f32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $8191, %eax # imm = 0x1FFF -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i13_f32: ; X64: # %bb.0: -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $8191, %eax # imm = 0x1FFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i13 @llvm.fptoui.sat.i13.f32(float %f) @@ -268,28 +247,21 @@ define i16 @test_unsigned_i16_f32(float %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i16_f32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i16_f32: ; X64: # %bb.0: -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $65535, %eax # imm = 0xFFFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i16 @llvm.fptoui.sat.i16.f32(float %f) @@ -338,33 +310,18 @@ define i19 @test_unsigned_i19_f32(float %f) nounwind { ; X86-SSE-LABEL: test_unsigned_i19_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movaps %xmm0, %xmm2 -; X86-SSE-NEXT: subss %xmm1, %xmm2 -; X86-SSE-NEXT: cvttss2si %xmm2, %eax -; X86-SSE-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 -; X86-SSE-NEXT: cvttss2si %xmm0, %ecx -; X86-SSE-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE-NEXT: cmovbel %eax, %ecx -; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %ecx, %edx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $524287, %eax # imm = 0x7FFFF -; X86-SSE-NEXT: cmovbel %edx, %eax +; X86-SSE-NEXT: maxss %xmm1, %xmm0 +; X86-SSE-NEXT: minss {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttss2si %xmm0, %eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i19_f32: ; X64: # %bb.0: -; X64-NEXT: cvttss2si %xmm0, %rax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $524287, %eax # imm = 0x7FFFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm1, %xmm0 +; X64-NEXT: minss {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: retq %x = call i19 @llvm.fptoui.sat.i19.f32(float %f) ret i19 %x @@ -557,15 +514,7 @@ define i50 @test_unsigned_i50_f32(float %f) nounwind { ; ; X64-LABEL: test_unsigned_i50_f32: ; X64: # %bb.0: -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-NEXT: movaps %xmm0, %xmm2 -; X64-NEXT: subss %xmm1, %xmm2 -; X64-NEXT: cvttss2si %xmm2, %rax -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: xorq %rax, %rcx ; X64-NEXT: cvttss2si %xmm0, %rax -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovaeq %rcx, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: ucomiss %xmm1, %xmm0 @@ -1047,28 +996,21 @@ define i1 @test_unsigned_i1_f64(double %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i1_f64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $1, %eax -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i1_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i1 @llvm.fptoui.sat.i1.f64(double %f) @@ -1117,28 +1059,21 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i8_f64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $255, %eax -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i8_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %x = call i8 @llvm.fptoui.sat.i8.f64(double %f) @@ -1186,28 +1121,21 @@ define i13 @test_unsigned_i13_f64(double %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i13_f64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $8191, %eax # imm = 0x1FFF -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i13_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $8191, %eax # imm = 0x1FFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i13 @llvm.fptoui.sat.i13.f64(double %f) @@ -1255,28 +1183,21 @@ define i16 @test_unsigned_i16_f64(double %f) nounwind { ; ; X86-SSE-LABEL: test_unsigned_i16_f64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: cvttsd2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: minsd %xmm0, %xmm1 +; X86-SSE-NEXT: cvttsd2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i16_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $65535, %eax # imm = 0xFFFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %x = call i16 @llvm.fptoui.sat.i16.f64(double %f) @@ -1325,33 +1246,18 @@ define i19 @test_unsigned_i19_f64(double %f) nounwind { ; X86-SSE-LABEL: test_unsigned_i19_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE-NEXT: movapd %xmm0, %xmm2 -; X86-SSE-NEXT: subsd %xmm1, %xmm2 -; X86-SSE-NEXT: cvttsd2si %xmm2, %eax -; X86-SSE-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 -; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx -; X86-SSE-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE-NEXT: cmovbel %eax, %ecx -; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %ecx, %edx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $524287, %eax # imm = 0x7FFFF -; X86-SSE-NEXT: cmovbel %edx, %eax +; X86-SSE-NEXT: maxsd %xmm1, %xmm0 +; X86-SSE-NEXT: minsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttsd2si %xmm0, %eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i19_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %rax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $524287, %eax # imm = 0x7FFFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxsd %xmm1, %xmm0 +; X64-NEXT: minsd {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %eax ; X64-NEXT: retq %x = call i19 @llvm.fptoui.sat.i19.f64(double %f) ret i19 %x @@ -1399,33 +1305,27 @@ define i32 @test_unsigned_i32_f64(double %f) nounwind { ; X86-SSE-LABEL: test_unsigned_i32_f64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: xorpd %xmm1, %xmm1 +; X86-SSE-NEXT: maxsd %xmm1, %xmm0 +; X86-SSE-NEXT: minsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X86-SSE-NEXT: movapd %xmm0, %xmm2 ; X86-SSE-NEXT: subsd %xmm1, %xmm2 ; X86-SSE-NEXT: cvttsd2si %xmm2, %eax ; X86-SSE-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 -; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx -; X86-SSE-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE-NEXT: cmovbel %eax, %ecx -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %ecx, %edx -; X86-SSE-NEXT: ucomisd {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $-1, %eax -; X86-SSE-NEXT: cmovbel %edx, %eax +; X86-SSE-NEXT: cmovbl %ecx, %eax ; X86-SSE-NEXT: retl ; ; X64-LABEL: test_unsigned_i32_f64: ; X64: # %bb.0: -; X64-NEXT: cvttsd2si %xmm0, %rax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: minsd %xmm1, %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %x = call i32 @llvm.fptoui.sat.i32.f64(double %f) ret i32 %x @@ -1544,22 +1444,10 @@ define i50 @test_unsigned_i50_f64(double %f) nounwind { ; ; X64-LABEL: test_unsigned_i50_f64: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: movapd %xmm0, %xmm2 -; X64-NEXT: subsd %xmm1, %xmm2 -; X64-NEXT: cvttsd2si %xmm2, %rax -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: xorq %rax, %rcx -; X64-NEXT: cvttsd2si %xmm0, %rax -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovaeq %rcx, %rax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: cmovaeq %rax, %rcx -; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0 -; X64-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF -; X64-NEXT: cmovbeq %rcx, %rax +; X64-NEXT: maxsd %xmm1, %xmm0 +; X64-NEXT: minsd {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttsd2si %xmm0, %rax ; X64-NEXT: retq %x = call i50 @llvm.fptoui.sat.i50.f64(double %f) ret i50 %x @@ -2041,15 +1929,11 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind { ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $1, %eax -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2059,14 +1943,11 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2123,15 +2004,11 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind { ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $255, %eax -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $al killed $al killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2141,14 +2018,11 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2204,15 +2078,11 @@ define i13 @test_unsigned_i13_f16(half %f) nounwind { ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $8191, %eax # imm = 0x1FFF -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2222,14 +2092,11 @@ define i13 @test_unsigned_i13_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $8191, %eax # imm = 0x1FFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2285,15 +2152,11 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind { ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cvttss2si %xmm0, %eax -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-SSE-NEXT: cmovbel %ecx, %eax +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: minss %xmm0, %xmm1 +; X86-SSE-NEXT: cvttss2si %xmm1, %eax ; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl @@ -2303,14 +2166,11 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: cvttss2si %xmm0, %eax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $65535, %eax # imm = 0xFFFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq @@ -2367,21 +2227,10 @@ define i19 @test_unsigned_i19_f16(half %f) nounwind { ; X86-SSE-NEXT: calll __gnu_h2f_ieee ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movaps %xmm0, %xmm2 -; X86-SSE-NEXT: subss %xmm1, %xmm2 -; X86-SSE-NEXT: cvttss2si %xmm2, %eax -; X86-SSE-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 -; X86-SSE-NEXT: cvttss2si %xmm0, %ecx -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %eax, %ecx -; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 -; X86-SSE-NEXT: cmovael %ecx, %edx -; X86-SSE-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 -; X86-SSE-NEXT: movl $524287, %eax # imm = 0x7FFFF -; X86-SSE-NEXT: cmovbel %edx, %eax +; X86-SSE-NEXT: maxss %xmm1, %xmm0 +; X86-SSE-NEXT: minss {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvttss2si %xmm0, %eax ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: retl ; @@ -2390,14 +2239,10 @@ define i19 @test_unsigned_i19_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: cvttss2si %xmm0, %rax -; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0 -; X64-NEXT: movl $524287, %eax # imm = 0x7FFFF -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: maxss %xmm1, %xmm0 +; X64-NEXT: minss {{.*}}(%rip), %xmm0 +; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq %x = call i19 @llvm.fptoui.sat.i19.f16(half %f) @@ -2612,15 +2457,7 @@ define i50 @test_unsigned_i50_f16(half %f) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movzwl %di, %edi ; X64-NEXT: callq __gnu_h2f_ieee@PLT -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-NEXT: movaps %xmm0, %xmm2 -; X64-NEXT: subss %xmm1, %xmm2 -; X64-NEXT: cvttss2si %xmm2, %rax -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: xorq %rax, %rcx ; X64-NEXT: cvttss2si %xmm0, %rax -; X64-NEXT: ucomiss %xmm1, %xmm0 -; X64-NEXT: cmovaeq %rcx, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: ucomiss %xmm1, %xmm0 From 24faa87075ac1b4e4d7f7336271b1ba72ffcc2c0 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Tue, 12 Jan 2021 21:36:55 +0900 Subject: [PATCH 27/86] [VE] Update VELIntrinsic tests Update comment and style of regression tests for VELIntrinsic Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D94490 --- llvm/test/CodeGen/VE/VELIntrinsics/vand.ll | 4 +- llvm/test/CodeGen/VE/VELIntrinsics/vld.ll | 130 ++++++++++----------- 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vand.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vand.ll index 17ab5d97b361cf..749ff16cd49c26 100644 --- a/llvm/test/CodeGen/VE/VELIntrinsics/vand.ll +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vand.ll @@ -4,8 +4,8 @@ ;;; ;;; Note: ;;; We test VAND*vvl, VAND*vvl_v, VAND*rvl, VAND*rvl_v, VAND*vvml_v, -;;; VAND*rvml_v, PVAND*vvl, PVAND*vvl_v, PVAND*rvl, PVAND*rvl_v, PVAND*vvml_v, and -;;; PVAND*rvml_v instructions. +;;; VAND*rvml_v, PVAND*vvl, PVAND*vvl_v, PVAND*rvl, PVAND*rvl_v, PVAND*vvml_v, +;;; and PVAND*rvml_v instructions. ; Function Attrs: nounwind readnone define fastcc <256 x double> @vand_vvvl(<256 x double> %0, <256 x double> %1) { diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vld.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vld.ll index d92666194edf70..6068b816f911a2 100644 --- a/llvm/test/CodeGen/VE/VELIntrinsics/vld.ll +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vld.ll @@ -3,7 +3,7 @@ ;;; Test vector load intrinsic instructions ;;; ;;; Note: -;;; We test VLD*rrl, VLD*irl, VLD*rrl_v, VLD*irl_v +;;; We test VLD*rrl, VLD*irl, VLD*rrl_v, and VLD*irl_v instructions. ; Function Attrs: nounwind define void @vld_vssl(i8* %0, i64 %1) { @@ -17,7 +17,7 @@ define void @vld_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -38,7 +38,7 @@ define void @vld_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vld.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -57,7 +57,7 @@ define void @vld_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -75,7 +75,7 @@ define void @vld_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vld.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -91,7 +91,7 @@ define void @vldnc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -112,7 +112,7 @@ define void @vldnc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -131,7 +131,7 @@ define void @vldnc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -149,7 +149,7 @@ define void @vldnc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -165,7 +165,7 @@ define void @vldu_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -186,7 +186,7 @@ define void @vldu_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -205,7 +205,7 @@ define void @vldu_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -223,7 +223,7 @@ define void @vldu_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -239,7 +239,7 @@ define void @vldunc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -260,7 +260,7 @@ define void @vldunc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -279,7 +279,7 @@ define void @vldunc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -297,7 +297,7 @@ define void @vldunc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -313,7 +313,7 @@ define void @vldlsx_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -334,7 +334,7 @@ define void @vldlsx_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -353,7 +353,7 @@ define void @vldlsx_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -371,7 +371,7 @@ define void @vldlsx_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -387,7 +387,7 @@ define void @vldlsxnc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -408,7 +408,7 @@ define void @vldlsxnc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -427,7 +427,7 @@ define void @vldlsxnc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -445,7 +445,7 @@ define void @vldlsxnc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -461,7 +461,7 @@ define void @vldlzx_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -482,7 +482,7 @@ define void @vldlzx_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -501,7 +501,7 @@ define void @vldlzx_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -519,7 +519,7 @@ define void @vldlzx_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -535,7 +535,7 @@ define void @vldlzxnc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -556,7 +556,7 @@ define void @vldlzxnc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -575,7 +575,7 @@ define void @vldlzxnc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -593,7 +593,7 @@ define void @vldlzxnc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -609,7 +609,7 @@ define void @vld2d_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -630,7 +630,7 @@ define void @vld2d_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -649,7 +649,7 @@ define void @vld2d_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -667,7 +667,7 @@ define void @vld2d_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -683,7 +683,7 @@ define void @vld2dnc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -704,7 +704,7 @@ define void @vld2dnc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -723,7 +723,7 @@ define void @vld2dnc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -741,7 +741,7 @@ define void @vld2dnc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -757,7 +757,7 @@ define void @vldu2d_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -778,7 +778,7 @@ define void @vldu2d_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -797,7 +797,7 @@ define void @vldu2d_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -815,7 +815,7 @@ define void @vldu2d_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -831,7 +831,7 @@ define void @vldu2dnc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -852,7 +852,7 @@ define void @vldu2dnc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -871,7 +871,7 @@ define void @vldu2dnc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -889,7 +889,7 @@ define void @vldu2dnc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -905,7 +905,7 @@ define void @vldl2dsx_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -926,7 +926,7 @@ define void @vldl2dsx_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -945,7 +945,7 @@ define void @vldl2dsx_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -963,7 +963,7 @@ define void @vldl2dsx_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -979,7 +979,7 @@ define void @vldl2dsxnc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -1000,7 +1000,7 @@ define void @vldl2dsxnc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -1019,7 +1019,7 @@ define void @vldl2dsxnc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -1037,7 +1037,7 @@ define void @vldl2dsxnc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -1053,7 +1053,7 @@ define void @vldl2dzx_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -1074,7 +1074,7 @@ define void @vldl2dzx_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -1093,7 +1093,7 @@ define void @vldl2dzx_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -1111,7 +1111,7 @@ define void @vldl2dzx_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } @@ -1127,7 +1127,7 @@ define void @vldl2dzxnc_vssl(i8* %0, i64 %1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 %1, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %3, i8* %0, i64 %1) ret void } @@ -1148,7 +1148,7 @@ define void @vldl2dzxnc_vssvl(i8* %0, i64 %1, i8* %2) { ; CHECK-NEXT: b.l.t (, %s10) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 %1, i8* %2, i32 256) %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) - tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + tail call void asm sideeffect "vst ${0:v}, $2, $1", "v,r,r"(<256 x double> %5, i8* %0, i64 %1) ret void } @@ -1167,7 +1167,7 @@ define void @vldl2dzxnc_vssl_imm(i8* %0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 8, i8* %0, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %2, i8* %0) ret void } @@ -1185,6 +1185,6 @@ define void @vldl2dzxnc_vssvl_imm(i8* %0, i8* %1) { ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 8, i8* %1, i32 256) %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) - tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %4, i8* %0) ret void } From 2f7ec77e3cdb9d874e0b6a73d3d7a6fe02c0125d Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Mon, 11 Jan 2021 09:58:31 -0500 Subject: [PATCH 28/86] [mlir][spirv] NFC: place ops in the proper file for their categories This commit moves dangling ops in the main ops.td file to the proper file matching their categories. This makes ops.td as purely including all category files. Differential Revision: https://reviews.llvm.org/D94413 --- .../mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td | 141 ++ .../mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td | 296 ++++ .../mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td | 61 + .../include/mlir/Dialect/SPIRV/IR/SPIRVOps.td | 494 +----- .../Dialect/SPIRV/IR/SPIRVStructureOps.td | 50 + mlir/test/Dialect/SPIRV/IR/barrier-ops.mlir | 49 + mlir/test/Dialect/SPIRV/IR/bit-ops.mlir | 130 ++ mlir/test/Dialect/SPIRV/IR/cast-ops.mlir | 262 ++++ mlir/test/Dialect/SPIRV/IR/group-ops.mlir | 13 +- mlir/test/Dialect/SPIRV/IR/logical-ops.mlir | 211 +++ mlir/test/Dialect/SPIRV/IR/memory-ops.mlir | 629 ++++++++ mlir/test/Dialect/SPIRV/IR/misc-ops.mlir | 29 + mlir/test/Dialect/SPIRV/IR/ops.mlir | 1355 ----------------- mlir/test/Dialect/SPIRV/IR/structure-ops.mlir | 35 + .../SPIRV/{barrier.mlir => barrier-ops.mlir} | 0 15 files changed, 1908 insertions(+), 1847 deletions(-) create mode 100644 mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td create mode 100644 mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td create mode 100644 mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td create mode 100644 mlir/test/Dialect/SPIRV/IR/barrier-ops.mlir create mode 100644 mlir/test/Dialect/SPIRV/IR/cast-ops.mlir create mode 100644 mlir/test/Dialect/SPIRV/IR/memory-ops.mlir create mode 100644 mlir/test/Dialect/SPIRV/IR/misc-ops.mlir delete mode 100644 mlir/test/Dialect/SPIRV/IR/ops.mlir rename mlir/test/Target/SPIRV/{barrier.mlir => barrier-ops.mlir} (100%) diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td new file mode 100644 index 00000000000000..98a0c300e69f02 --- /dev/null +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td @@ -0,0 +1,141 @@ +//===-- SPIRVBarrierOps.td - MLIR SPIR-V Barrier Ops -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains barrier ops for the SPIR-V dialect. It corresponds +// to "3.32.20. Barrrier Instructions" of the SPIR-V spec. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPIRV_IR_BARRIER_OPS +#define MLIR_DIALECT_SPIRV_IR_BARRIER_OPS + +include "mlir/Dialect/SPIRV/IR/SPIRVBase.td" + +// ----- + +def SPV_ControlBarrierOp : SPV_Op<"ControlBarrier", []> { + let summary = [{ + Wait for other invocations of this module to reach the current point of + execution. + }]; + + let description = [{ + All invocations of this module within Execution scope must reach this + point of execution before any invocation will proceed beyond it. + + When Execution is Workgroup or larger, behavior is undefined if this + instruction is used in control flow that is non-uniform within + Execution. When Execution is Subgroup or Invocation, the behavior of + this instruction in non-uniform control flow is defined by the client + API. + + If Semantics is not None, this instruction also serves as an + OpMemoryBarrier instruction, and must also perform and adhere to the + description and semantics of an OpMemoryBarrier instruction with the + same Memory and Semantics operands. This allows atomically specifying + both a control barrier and a memory barrier (that is, without needing + two instructions). If Semantics is None, Memory is ignored. + + Before version 1.3, it is only valid to use this instruction with + TessellationControl, GLCompute, or Kernel execution models. There is no + such restriction starting with version 1.3. + + When used with the TessellationControl execution model, it also + implicitly synchronizes the Output Storage Class: Writes to Output + variables performed by any invocation executed prior to a + OpControlBarrier will be visible to any other invocation after return + from that OpControlBarrier. + + + + ``` + scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ... + + memory-semantics ::= `"None"` | `"Acquire"` | "Release"` | ... + + control-barrier-op ::= `spv.ControlBarrier` scope, scope, memory-semantics + ``` + + #### Example: + + ```mlir + spv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory" + + ``` + }]; + + let arguments = (ins + SPV_ScopeAttr:$execution_scope, + SPV_ScopeAttr:$memory_scope, + SPV_MemorySemanticsAttr:$memory_semantics + ); + + let results = (outs); + + let verifier = [{ return verifyMemorySemantics(*this); }]; + + let autogenSerialization = 0; + + let assemblyFormat = [{ + $execution_scope `,` $memory_scope `,` $memory_semantics attr-dict + }]; +} + +// ----- + +def SPV_MemoryBarrierOp : SPV_Op<"MemoryBarrier", []> { + let summary = "Control the order that memory accesses are observed."; + + let description = [{ + Ensures that memory accesses issued before this instruction will be + observed before memory accesses issued after this instruction. This + control is ensured only for memory accesses issued by this invocation + and observed by another invocation executing within Memory scope. If the + Vulkan memory model is declared, this ordering only applies to memory + accesses that use the NonPrivatePointer memory operand or + NonPrivateTexel image operand. + + Semantics declares what kind of memory is being controlled and what kind + of control to apply. + + To execute both a memory barrier and a control barrier, see + OpControlBarrier. + + + + ``` + scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ... + + memory-semantics ::= `"None"` | `"Acquire"` | `"Release"` | ... + + memory-barrier-op ::= `spv.MemoryBarrier` scope, memory-semantics + ``` + + #### Example: + + ```mlir + spv.MemoryBarrier "Device", "Acquire|UniformMemory" + + ``` + }]; + + let arguments = (ins + SPV_ScopeAttr:$memory_scope, + SPV_MemorySemanticsAttr:$memory_semantics + ); + + let results = (outs); + + let verifier = [{ return verifyMemorySemantics(*this); }]; + + let autogenSerialization = 0; + + let assemblyFormat = "$memory_scope `,` $memory_semantics attr-dict"; +} + +#endif // MLIR_DIALECT_SPIRV_IR_BARRIER_OPS diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td new file mode 100644 index 00000000000000..ef296668cbc391 --- /dev/null +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td @@ -0,0 +1,296 @@ +//===-- SPIRVMemoryOps.td - MLIR SPIR-V Memory Ops ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains memory ops for the SPIR-V dialect. It corresponds +// to "3.32.8. Memory Instructions" of the SPIR-V spec. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPIRV_IR_MEMORY_OPS +#define MLIR_DIALECT_SPIRV_IR_MEMORY_OPS + +include "mlir/Dialect/SPIRV/IR/SPIRVBase.td" + +// ----- + +def SPV_AccessChainOp : SPV_Op<"AccessChain", [NoSideEffect]> { + let summary = [{ + Create a pointer into a composite object that can be used with OpLoad + and OpStore. + }]; + + let description = [{ + Result Type must be an OpTypePointer. Its Type operand must be the type + reached by walking the Base’s type hierarchy down to the last provided + index in Indexes, and its Storage Class operand must be the same as the + Storage Class of Base. + + Base must be a pointer, pointing to the base of a composite object. + + Indexes walk the type hierarchy to the desired depth, potentially down + to scalar granularity. The first index in Indexes will select the top- + level member/element/component/element of the base composite. All + composite constituents use zero-based numbering, as described by their + OpType… instruction. The second index will apply similarly to that + result, and so on. Once any non-composite type is reached, there must be + no remaining (unused) indexes. + + Each index in Indexes + + - must be a scalar integer type, + + - is treated as a signed count, and + + - must be an OpConstant when indexing into a structure. + + + ``` + access-chain-op ::= ssa-id `=` `spv.AccessChain` ssa-use + `[` ssa-use (',' ssa-use)* `]` + `:` pointer-type + ``` + + #### Example: + + ```mlir + %0 = "spv.constant"() { value = 1: i32} : () -> i32 + %1 = spv.Variable : !spv.ptr>, Function> + %2 = spv.AccessChain %1[%0] : !spv.ptr>, Function> + %3 = spv.Load "Function" %2 ["Volatile"] : !spv.array<4xf32> + ``` + }]; + + let arguments = (ins + SPV_AnyPtr:$base_ptr, + Variadic:$indices + ); + + let results = (outs + SPV_AnyPtr:$component_ptr + ); + + let builders = [OpBuilderDAG<(ins "Value":$basePtr, "ValueRange":$indices)>]; + + let hasCanonicalizer = 1; +} + +// ----- + +def SPV_CopyMemoryOp : SPV_Op<"CopyMemory", []> { + let summary = [{ + Copy from the memory pointed to by Source to the memory pointed to by + Target. Both operands must be non-void pointers and having the same + Type operand in their OpTypePointer type declaration. Matching Storage + Class is not required. The amount of memory copied is the size of the + type pointed to. The copied type must have a fixed size; i.e., it cannot + be, nor include, any OpTypeRuntimeArray types. + }]; + + let description = [{ + If present, any Memory Operands must begin with a memory operand + literal. If not present, it is the same as specifying the memory operand + None. Before version 1.4, at most one memory operands mask can be + provided. Starting with version 1.4 two masks can be provided, as + described in Memory Operands. If no masks or only one mask is present, + it applies to both Source and Target. If two masks are present, the + first applies to Target and cannot include MakePointerVisible, and the + second applies to Source and cannot include MakePointerAvailable. + + + + ``` + copy-memory-op ::= `spv.CopyMemory ` storage-class ssa-use + storage-class ssa-use + (`[` memory-access `]` (`, [` memory-access `]`)?)? + ` : ` spirv-element-type + ``` + + #### Example: + + ```mlir + %0 = spv.Variable : !spv.ptr + %1 = spv.Variable : !spv.ptr + spv.CopyMemory "Function" %0, "Function" %1 : f32 + ``` + }]; + + let arguments = (ins + SPV_AnyPtr:$target, + SPV_AnyPtr:$source, + OptionalAttr:$memory_access, + OptionalAttr:$alignment, + OptionalAttr:$source_memory_access, + OptionalAttr:$source_alignment + ); + + let results = (outs); + + let verifier = [{ return verifyCopyMemory(*this); }]; + + let autogenSerialization = 0; +} + +// ----- + +def SPV_LoadOp : SPV_Op<"Load", []> { + let summary = "Load through a pointer."; + + let description = [{ + Result Type is the type of the loaded object. It must be a type with + fixed size; i.e., it cannot be, nor include, any OpTypeRuntimeArray + types. + + Pointer is the pointer to load through. Its type must be an + OpTypePointer whose Type operand is the same as Result Type. + + If present, any Memory Operands must begin with a memory operand + literal. If not present, it is the same as specifying the memory operand + None. + + + + ``` + memory-access ::= `"None"` | `"Volatile"` | `"Aligned", ` integer-literal + | `"NonTemporal"` + + load-op ::= ssa-id ` = spv.Load ` storage-class ssa-use + (`[` memory-access `]`)? ` : ` spirv-element-type + ``` + + #### Example: + + ```mlir + %0 = spv.Variable : !spv.ptr + %1 = spv.Load "Function" %0 : f32 + %2 = spv.Load "Function" %0 ["Volatile"] : f32 + %3 = spv.Load "Function" %0 ["Aligned", 4] : f32 + ``` + }]; + + let arguments = (ins + SPV_AnyPtr:$ptr, + OptionalAttr:$memory_access, + OptionalAttr:$alignment + ); + + let results = (outs + SPV_Type:$value + ); + + let builders = [ + OpBuilderDAG<(ins "Value":$basePtr, + CArg<"IntegerAttr", "{}">:$memory_access, + CArg<"IntegerAttr", "{}">:$alignment)> + ]; +} + +// ----- + +def SPV_StoreOp : SPV_Op<"Store", []> { + let summary = "Store through a pointer."; + + let description = [{ + Pointer is the pointer to store through. Its type must be an + OpTypePointer whose Type operand is the same as the type of Object. + + Object is the object to store. + + If present, any Memory Operands must begin with a memory operand + literal. If not present, it is the same as specifying the memory operand + None. + + + + ``` + store-op ::= `spv.Store ` storage-class ssa-use `, ` ssa-use `, ` + (`[` memory-access `]`)? `:` spirv-element-type + ``` + + #### Example: + + ```mlir + %0 = spv.Variable : !spv.ptr + %1 = spv.FMul ... : f32 + spv.Store "Function" %0, %1 : f32 + spv.Store "Function" %0, %1 ["Volatile"] : f32 + spv.Store "Function" %0, %1 ["Aligned", 4] : f32 + ``` + }]; + + let arguments = (ins + SPV_AnyPtr:$ptr, + SPV_Type:$value, + OptionalAttr:$memory_access, + OptionalAttr:$alignment + ); + + let results = (outs); + + let builders = [ + OpBuilderDAG<(ins "Value":$ptr, "Value":$value, + CArg<"ArrayRef", "{}">:$namedAttrs), + [{ + $_state.addOperands(ptr); + $_state.addOperands(value); + $_state.addAttributes(namedAttrs); + }]> + ]; +} + +// ----- + +def SPV_VariableOp : SPV_Op<"Variable", []> { + let summary = [{ + Allocate an object in memory, resulting in a pointer to it, which can be + used with OpLoad and OpStore. + }]; + + let description = [{ + Result Type must be an OpTypePointer. Its Type operand is the type of + object in memory. + + Storage Class is the Storage Class of the memory holding the object. + Since the op is used to model function-level variables, the storage class + must be the `Function` Storage Class. + + Initializer is optional. If Initializer is present, it will be the + initial value of the variable’s memory content. Initializer must be an + from a constant instruction or a global (module scope) OpVariable + instruction. Initializer must have the same type as the type pointed to + by Result Type. + + + + ``` + variable-op ::= ssa-id `=` `spv.Variable` (`init(` ssa-use `)`)? + attribute-dict? `:` spirv-pointer-type + ``` + + where `init` specifies initializer. + + #### Example: + + ```mlir + %0 = spv.constant ... + + %1 = spv.Variable : !spv.ptr + %2 = spv.Variable init(%0): !spv.ptr + ``` + }]; + + let arguments = (ins + SPV_StorageClassAttr:$storage_class, + Optional:$initializer + ); + + let results = (outs + SPV_AnyPtr:$pointer + ); +} + +#endif // MLIR_DIALECT_SPIRV_IR_MEMORY_OPS diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td new file mode 100644 index 00000000000000..7e227c18956469 --- /dev/null +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td @@ -0,0 +1,61 @@ +//===-- SPIRVBarrierOps.td - MLIR SPIR-V Barrier Ops -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains miscellaneous ops for the SPIR-V dialect. It corresponds +// to "3.32.1. Miscellaneous Instructions" of the SPIR-V spec. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPIRV_IR_MISC_OPS +#define MLIR_DIALECT_SPIRV_IR_MISC_OPS + +include "mlir/Dialect/SPIRV/IR/SPIRVBase.td" + +// ----- + +def SPV_UndefOp : SPV_Op<"undef", []> { + let summary = "Make an intermediate object whose value is undefined."; + + let description = [{ + Result Type is the type of object to make. + + Each consumption of Result yields an arbitrary, possibly different + bit pattern or abstract value resulting in possibly different concrete, + abstract, or opaque values. + + + + ``` + undef-op ::= `spv.undef` `:` spirv-type + ``` + + #### Example: + + ```mlir + %0 = spv.undef : f32 + %1 = spv.undef : !spv.struct>> + ``` + }]; + + let arguments = (ins); + + let results = (outs + SPV_Type:$result + ); + + let verifier = [{ return success(); }]; + + let hasOpcode = 0; + let autogenSerialization = 0; + + let assemblyFormat = "attr-dict `:` type($result)"; +} + +// ----- + +#endif // MLIR_DIALECT_SPIRV_IR_MISC_OPS diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td index 3621ff71f9c733..e5892012b98dd2 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td @@ -24,6 +24,7 @@ include "mlir/Dialect/SPIRV/IR/SPIRVBase.td" include "mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVAtomicOps.td" +include "mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVBitOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVCastOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td" @@ -33,500 +34,11 @@ include "mlir/Dialect/SPIRV/IR/SPIRVGLSLOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVGroupOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVLogicalOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVMatrixOps.td" +include "mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td" +include "mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVNonUniformOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVOCLOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td" include "mlir/Interfaces/SideEffectInterfaces.td" -// ----- - -def SPV_AccessChainOp : SPV_Op<"AccessChain", [NoSideEffect]> { - let summary = [{ - Create a pointer into a composite object that can be used with OpLoad - and OpStore. - }]; - - let description = [{ - Result Type must be an OpTypePointer. Its Type operand must be the type - reached by walking the Base’s type hierarchy down to the last provided - index in Indexes, and its Storage Class operand must be the same as the - Storage Class of Base. - - Base must be a pointer, pointing to the base of a composite object. - - Indexes walk the type hierarchy to the desired depth, potentially down - to scalar granularity. The first index in Indexes will select the top- - level member/element/component/element of the base composite. All - composite constituents use zero-based numbering, as described by their - OpType… instruction. The second index will apply similarly to that - result, and so on. Once any non-composite type is reached, there must be - no remaining (unused) indexes. - - Each index in Indexes - - - must be a scalar integer type, - - - is treated as a signed count, and - - - must be an OpConstant when indexing into a structure. - - - ``` - access-chain-op ::= ssa-id `=` `spv.AccessChain` ssa-use - `[` ssa-use (',' ssa-use)* `]` - `:` pointer-type - ``` - - #### Example: - - ```mlir - %0 = "spv.constant"() { value = 1: i32} : () -> i32 - %1 = spv.Variable : !spv.ptr>, Function> - %2 = spv.AccessChain %1[%0] : !spv.ptr>, Function> - %3 = spv.Load "Function" %2 ["Volatile"] : !spv.array<4xf32> - ``` - }]; - - let arguments = (ins - SPV_AnyPtr:$base_ptr, - Variadic:$indices - ); - - let results = (outs - SPV_AnyPtr:$component_ptr - ); - - let builders = [OpBuilderDAG<(ins "Value":$basePtr, "ValueRange":$indices)>]; - - let hasCanonicalizer = 1; -} - -// ----- - -def SPV_ControlBarrierOp : SPV_Op<"ControlBarrier", []> { - let summary = [{ - Wait for other invocations of this module to reach the current point of - execution. - }]; - - let description = [{ - All invocations of this module within Execution scope must reach this - point of execution before any invocation will proceed beyond it. - - When Execution is Workgroup or larger, behavior is undefined if this - instruction is used in control flow that is non-uniform within - Execution. When Execution is Subgroup or Invocation, the behavior of - this instruction in non-uniform control flow is defined by the client - API. - - If Semantics is not None, this instruction also serves as an - OpMemoryBarrier instruction, and must also perform and adhere to the - description and semantics of an OpMemoryBarrier instruction with the - same Memory and Semantics operands. This allows atomically specifying - both a control barrier and a memory barrier (that is, without needing - two instructions). If Semantics is None, Memory is ignored. - - Before version 1.3, it is only valid to use this instruction with - TessellationControl, GLCompute, or Kernel execution models. There is no - such restriction starting with version 1.3. - - When used with the TessellationControl execution model, it also - implicitly synchronizes the Output Storage Class: Writes to Output - variables performed by any invocation executed prior to a - OpControlBarrier will be visible to any other invocation after return - from that OpControlBarrier. - - - - ``` - scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ... - - memory-semantics ::= `"None"` | `"Acquire"` | "Release"` | ... - - control-barrier-op ::= `spv.ControlBarrier` scope, scope, memory-semantics - ``` - - #### Example: - - ```mlir - spv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory" - - ``` - }]; - - let arguments = (ins - SPV_ScopeAttr:$execution_scope, - SPV_ScopeAttr:$memory_scope, - SPV_MemorySemanticsAttr:$memory_semantics - ); - - let results = (outs); - - let verifier = [{ return verifyMemorySemantics(*this); }]; - - let autogenSerialization = 0; - - let assemblyFormat = [{ - $execution_scope `,` $memory_scope `,` $memory_semantics attr-dict - }]; -} - -// ----- - -def SPV_CopyMemoryOp : SPV_Op<"CopyMemory", []> { - let summary = [{ - Copy from the memory pointed to by Source to the memory pointed to by - Target. Both operands must be non-void pointers and having the same - Type operand in their OpTypePointer type declaration. Matching Storage - Class is not required. The amount of memory copied is the size of the - type pointed to. The copied type must have a fixed size; i.e., it cannot - be, nor include, any OpTypeRuntimeArray types. - }]; - - let description = [{ - If present, any Memory Operands must begin with a memory operand - literal. If not present, it is the same as specifying the memory operand - None. Before version 1.4, at most one memory operands mask can be - provided. Starting with version 1.4 two masks can be provided, as - described in Memory Operands. If no masks or only one mask is present, - it applies to both Source and Target. If two masks are present, the - first applies to Target and cannot include MakePointerVisible, and the - second applies to Source and cannot include MakePointerAvailable. - - - - ``` - copy-memory-op ::= `spv.CopyMemory ` storage-class ssa-use - storage-class ssa-use - (`[` memory-access `]` (`, [` memory-access `]`)?)? - ` : ` spirv-element-type - ``` - - #### Example: - - ```mlir - %0 = spv.Variable : !spv.ptr - %1 = spv.Variable : !spv.ptr - spv.CopyMemory "Function" %0, "Function" %1 : f32 - ``` - }]; - - let arguments = (ins - SPV_AnyPtr:$target, - SPV_AnyPtr:$source, - OptionalAttr:$memory_access, - OptionalAttr:$alignment, - OptionalAttr:$source_memory_access, - OptionalAttr:$source_alignment - ); - - let results = (outs); - - let verifier = [{ return verifyCopyMemory(*this); }]; - - let autogenSerialization = 0; -} - -// ----- - -def SPV_ExecutionModeOp : SPV_Op<"ExecutionMode", [InModuleScope]> { - let summary = "Declare an execution mode for an entry point."; - - let description = [{ - Entry Point must be the Entry Point operand of an OpEntryPoint - instruction. - - Mode is the execution mode. See Execution Mode. - - This instruction is only valid when the Mode operand is an execution - mode that takes no Extra Operands, or takes Extra Operands that are not - operands. - - - - ``` - execution-mode ::= "Invocations" | "SpacingEqual" | - - - execution-mode-op ::= `spv.ExecutionMode ` ssa-use execution-mode - (integer-literal (`, ` integer-literal)* )? - ``` - - #### Example: - - ```mlir - spv.ExecutionMode @foo "ContractionOff" - spv.ExecutionMode @bar "LocalSizeHint", 3, 4, 5 - ``` - }]; - - let arguments = (ins - FlatSymbolRefAttr:$fn, - SPV_ExecutionModeAttr:$execution_mode, - I32ArrayAttr:$values - ); - - let results = (outs); - - let verifier = [{ return success(); }]; - - let autogenSerialization = 0; - - let builders = [ - OpBuilderDAG<(ins "spirv::FuncOp":$function, - "spirv::ExecutionMode":$executionMode, "ArrayRef":$params)>]; -} - -// ----- - -def SPV_LoadOp : SPV_Op<"Load", []> { - let summary = "Load through a pointer."; - - let description = [{ - Result Type is the type of the loaded object. It must be a type with - fixed size; i.e., it cannot be, nor include, any OpTypeRuntimeArray - types. - - Pointer is the pointer to load through. Its type must be an - OpTypePointer whose Type operand is the same as Result Type. - - If present, any Memory Operands must begin with a memory operand - literal. If not present, it is the same as specifying the memory operand - None. - - - - ``` - memory-access ::= `"None"` | `"Volatile"` | `"Aligned", ` integer-literal - | `"NonTemporal"` - - load-op ::= ssa-id ` = spv.Load ` storage-class ssa-use - (`[` memory-access `]`)? ` : ` spirv-element-type - ``` - - #### Example: - - ```mlir - %0 = spv.Variable : !spv.ptr - %1 = spv.Load "Function" %0 : f32 - %2 = spv.Load "Function" %0 ["Volatile"] : f32 - %3 = spv.Load "Function" %0 ["Aligned", 4] : f32 - ``` - }]; - - let arguments = (ins - SPV_AnyPtr:$ptr, - OptionalAttr:$memory_access, - OptionalAttr:$alignment - ); - - let results = (outs - SPV_Type:$value - ); - - let builders = [ - OpBuilderDAG<(ins "Value":$basePtr, - CArg<"IntegerAttr", "{}">:$memory_access, - CArg<"IntegerAttr", "{}">:$alignment)> - ]; -} - -// ----- - -def SPV_MemoryBarrierOp : SPV_Op<"MemoryBarrier", []> { - let summary = "Control the order that memory accesses are observed."; - - let description = [{ - Ensures that memory accesses issued before this instruction will be - observed before memory accesses issued after this instruction. This - control is ensured only for memory accesses issued by this invocation - and observed by another invocation executing within Memory scope. If the - Vulkan memory model is declared, this ordering only applies to memory - accesses that use the NonPrivatePointer memory operand or - NonPrivateTexel image operand. - - Semantics declares what kind of memory is being controlled and what kind - of control to apply. - - To execute both a memory barrier and a control barrier, see - OpControlBarrier. - - - - ``` - scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ... - - memory-semantics ::= `"None"` | `"Acquire"` | `"Release"` | ... - - memory-barrier-op ::= `spv.MemoryBarrier` scope, memory-semantics - ``` - - #### Example: - - ```mlir - spv.MemoryBarrier "Device", "Acquire|UniformMemory" - - ``` - }]; - - let arguments = (ins - SPV_ScopeAttr:$memory_scope, - SPV_MemorySemanticsAttr:$memory_semantics - ); - - let results = (outs); - - let verifier = [{ return verifyMemorySemantics(*this); }]; - - let autogenSerialization = 0; - - let assemblyFormat = "$memory_scope `,` $memory_semantics attr-dict"; -} - -// ----- - -def SPV_StoreOp : SPV_Op<"Store", []> { - let summary = "Store through a pointer."; - - let description = [{ - Pointer is the pointer to store through. Its type must be an - OpTypePointer whose Type operand is the same as the type of Object. - - Object is the object to store. - - If present, any Memory Operands must begin with a memory operand - literal. If not present, it is the same as specifying the memory operand - None. - - - - ``` - store-op ::= `spv.Store ` storage-class ssa-use `, ` ssa-use `, ` - (`[` memory-access `]`)? `:` spirv-element-type - ``` - - #### Example: - - ```mlir - %0 = spv.Variable : !spv.ptr - %1 = spv.FMul ... : f32 - spv.Store "Function" %0, %1 : f32 - spv.Store "Function" %0, %1 ["Volatile"] : f32 - spv.Store "Function" %0, %1 ["Aligned", 4] : f32 - ``` - }]; - - let arguments = (ins - SPV_AnyPtr:$ptr, - SPV_Type:$value, - OptionalAttr:$memory_access, - OptionalAttr:$alignment - ); - - let results = (outs); - - let builders = [ - OpBuilderDAG<(ins "Value":$ptr, "Value":$value, - CArg<"ArrayRef", "{}">:$namedAttrs), - [{ - $_state.addOperands(ptr); - $_state.addOperands(value); - $_state.addAttributes(namedAttrs); - }]> - ]; -} - -// ----- - -def SPV_UndefOp : SPV_Op<"undef", []> { - let summary = "Make an intermediate object whose value is undefined."; - - let description = [{ - Result Type is the type of object to make. - - Each consumption of Result yields an arbitrary, possibly different - bit pattern or abstract value resulting in possibly different concrete, - abstract, or opaque values. - - - - ``` - undef-op ::= `spv.undef` `:` spirv-type - ``` - - #### Example: - - ```mlir - %0 = spv.undef : f32 - %1 = spv.undef : !spv.struct>> - ``` - }]; - - let arguments = (ins); - - let results = (outs - SPV_Type:$result - ); - - let verifier = [{ return success(); }]; - - let hasOpcode = 0; - let autogenSerialization = 0; - - let assemblyFormat = "attr-dict `:` type($result)"; -} - -// ----- - -def SPV_VariableOp : SPV_Op<"Variable", []> { - let summary = [{ - Allocate an object in memory, resulting in a pointer to it, which can be - used with OpLoad and OpStore. - }]; - - let description = [{ - Result Type must be an OpTypePointer. Its Type operand is the type of - object in memory. - - Storage Class is the Storage Class of the memory holding the object. - Since the op is used to model function-level variables, the storage class - must be the `Function` Storage Class. - - Initializer is optional. If Initializer is present, it will be the - initial value of the variable’s memory content. Initializer must be an - from a constant instruction or a global (module scope) OpVariable - instruction. Initializer must have the same type as the type pointed to - by Result Type. - - - - ``` - variable-op ::= ssa-id `=` `spv.Variable` (`init(` ssa-use `)`)? - attribute-dict? `:` spirv-pointer-type - ``` - - where `init` specifies initializer. - - #### Example: - - ```mlir - %0 = spv.constant ... - - %1 = spv.Variable : !spv.ptr - %2 = spv.Variable init(%0): !spv.ptr - ``` - }]; - - let arguments = (ins - SPV_StorageClassAttr:$storage_class, - Optional:$initializer - ); - - let results = (outs - SPV_AnyPtr:$pointer - ); -} - -// ----- - #endif // MLIR_DIALECT_SPIRV_IR_OPS diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td index 88183f98c6d1d9..d4bf2861c960b6 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td @@ -195,6 +195,56 @@ def SPV_EntryPointOp : SPV_Op<"EntryPoint", [InModuleScope]> { // ----- +def SPV_ExecutionModeOp : SPV_Op<"ExecutionMode", [InModuleScope]> { + let summary = "Declare an execution mode for an entry point."; + + let description = [{ + Entry Point must be the Entry Point operand of an OpEntryPoint + instruction. + + Mode is the execution mode. See Execution Mode. + + This instruction is only valid when the Mode operand is an execution + mode that takes no Extra Operands, or takes Extra Operands that are not + operands. + + + + ``` + execution-mode ::= "Invocations" | "SpacingEqual" | + + + execution-mode-op ::= `spv.ExecutionMode ` ssa-use execution-mode + (integer-literal (`, ` integer-literal)* )? + ``` + + #### Example: + + ```mlir + spv.ExecutionMode @foo "ContractionOff" + spv.ExecutionMode @bar "LocalSizeHint", 3, 4, 5 + ``` + }]; + + let arguments = (ins + FlatSymbolRefAttr:$fn, + SPV_ExecutionModeAttr:$execution_mode, + I32ArrayAttr:$values + ); + + let results = (outs); + + let verifier = [{ return success(); }]; + + let autogenSerialization = 0; + + let builders = [ + OpBuilderDAG<(ins "spirv::FuncOp":$function, + "spirv::ExecutionMode":$executionMode, "ArrayRef":$params)>]; +} + +// ----- + def SPV_FuncOp : SPV_Op<"func", [ AutomaticAllocationScope, DeclareOpInterfaceMethods, FunctionLike, InModuleScope, IsolatedFromAbove, Symbol diff --git a/mlir/test/Dialect/SPIRV/IR/barrier-ops.mlir b/mlir/test/Dialect/SPIRV/IR/barrier-ops.mlir new file mode 100644 index 00000000000000..bd16d29add81da --- /dev/null +++ b/mlir/test/Dialect/SPIRV/IR/barrier-ops.mlir @@ -0,0 +1,49 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s + +//===----------------------------------------------------------------------===// +// spv.ControlBarrier +//===----------------------------------------------------------------------===// + +func @control_barrier_0() -> () { + // CHECK: spv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory" + spv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory" + return +} + +// ----- + +func @control_barrier_1() -> () { + // expected-error @+1 {{invalid execution_scope attribute specification: "Something"}} + spv.ControlBarrier "Something", "Device", "Acquire|UniformMemory" + return +} + + +// ----- + +//===----------------------------------------------------------------------===// +// spv.MemoryBarrier +//===----------------------------------------------------------------------===// + +func @memory_barrier_0() -> () { + // CHECK: spv.MemoryBarrier "Device", "Acquire|UniformMemory" + spv.MemoryBarrier "Device", "Acquire|UniformMemory" + return +} + +// ----- + +func @memory_barrier_1() -> () { + // CHECK: spv.MemoryBarrier "Workgroup", "Acquire" + spv.MemoryBarrier "Workgroup", "Acquire" + return +} + +// ----- + +func @memory_barrier_2() -> () { + // expected-error @+1 {{expected at most one of these four memory constraints to be set: `Acquire`, `Release`,`AcquireRelease` or `SequentiallyConsistent`}} + spv.MemoryBarrier "Device", "Acquire|Release" + return +} + diff --git a/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir b/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir index 66998ec7084e58..b5681e35d192d7 100644 --- a/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir @@ -1,5 +1,79 @@ // RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s +//===----------------------------------------------------------------------===// +// spv.BitCount +//===----------------------------------------------------------------------===// + +func @bitcount(%arg: i32) -> i32 { + // CHECK: spv.BitCount {{%.*}} : i32 + %0 = spv.BitCount %arg : i32 + spv.ReturnValue %0 : i32 +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.BitFieldInsert +//===----------------------------------------------------------------------===// + +func @bit_field_insert_vec(%base: vector<3xi32>, %insert: vector<3xi32>, %offset: i32, %count: i16) -> vector<3xi32> { + // CHECK: {{%.*}} = spv.BitFieldInsert {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi32>, i32, i16 + %0 = spv.BitFieldInsert %base, %insert, %offset, %count : vector<3xi32>, i32, i16 + spv.ReturnValue %0 : vector<3xi32> +} + +// ----- + +func @bit_field_insert_invalid_insert_type(%base: vector<3xi32>, %insert: vector<2xi32>, %offset: i32, %count: i16) -> vector<3xi32> { + // expected-error @+1 {{all of {base, insert, result} have same type}} + %0 = "spv.BitFieldInsert" (%base, %insert, %offset, %count) : (vector<3xi32>, vector<2xi32>, i32, i16) -> vector<3xi32> + spv.ReturnValue %0 : vector<3xi32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.BitFieldSExtract +//===----------------------------------------------------------------------===// + +func @bit_field_s_extract_vec(%base: vector<3xi32>, %offset: i8, %count: i8) -> vector<3xi32> { + // CHECK: {{%.*}} = spv.BitFieldSExtract {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi32>, i8, i8 + %0 = spv.BitFieldSExtract %base, %offset, %count : vector<3xi32>, i8, i8 + spv.ReturnValue %0 : vector<3xi32> +} + +//===----------------------------------------------------------------------===// +// spv.BitFieldUExtract +//===----------------------------------------------------------------------===// + +func @bit_field_u_extract_vec(%base: vector<3xi32>, %offset: i8, %count: i8) -> vector<3xi32> { + // CHECK: {{%.*}} = spv.BitFieldUExtract {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi32>, i8, i8 + %0 = spv.BitFieldUExtract %base, %offset, %count : vector<3xi32>, i8, i8 + spv.ReturnValue %0 : vector<3xi32> +} + +// ----- + +func @bit_field_u_extract_invalid_result_type(%base: vector<3xi32>, %offset: i32, %count: i16) -> vector<4xi32> { + // expected-error @+1 {{failed to verify that all of {base, result} have same type}} + %0 = "spv.BitFieldUExtract" (%base, %offset, %count) : (vector<3xi32>, i32, i16) -> vector<4xi32> + spv.ReturnValue %0 : vector<4xi32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.BitReverse +//===----------------------------------------------------------------------===// + +func @bitreverse(%arg: i32) -> i32 { + // CHECK: spv.BitReverse {{%.*}} : i32 + %0 = spv.BitReverse %arg : i32 + spv.ReturnValue %0 : i32 +} + +// ----- + //===----------------------------------------------------------------------===// // spv.BitwiseOr //===----------------------------------------------------------------------===// @@ -75,3 +149,59 @@ func @bitwise_and_float(%arg0: f16, %arg1: f16) -> f16 { %0 = spv.BitwiseAnd %arg0, %arg1 : f16 return %0 : f16 } + +// ----- + +//===----------------------------------------------------------------------===// +// spv.Not +//===----------------------------------------------------------------------===// + +func @not(%arg: i32) -> i32 { + // CHECK: spv.Not {{%.*}} : i32 + %0 = spv.Not %arg : i32 + spv.ReturnValue %0 : i32 +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.ShiftLeftLogical +//===----------------------------------------------------------------------===// + +func @shift_left_logical(%arg0: i32, %arg1 : i16) -> i32 { + // CHECK: {{%.*}} = spv.ShiftLeftLogical {{%.*}}, {{%.*}} : i32, i16 + %0 = spv.ShiftLeftLogical %arg0, %arg1: i32, i16 + spv.ReturnValue %0 : i32 +} + +// ----- + +func @shift_left_logical_invalid_result_type(%arg0: i32, %arg1 : i16) -> i16 { + // expected-error @+1 {{expected the same type for the first operand and result, but provided 'i32' and 'i16'}} + %0 = "spv.ShiftLeftLogical" (%arg0, %arg1) : (i32, i16) -> (i16) + spv.ReturnValue %0 : i16 +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.ShiftRightArithmetic +//===----------------------------------------------------------------------===// + +func @shift_right_arithmetic(%arg0: vector<4xi32>, %arg1 : vector<4xi8>) -> vector<4xi32> { + // CHECK: {{%.*}} = spv.ShiftRightArithmetic {{%.*}}, {{%.*}} : vector<4xi32>, vector<4xi8> + %0 = spv.ShiftRightArithmetic %arg0, %arg1: vector<4xi32>, vector<4xi8> + spv.ReturnValue %0 : vector<4xi32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.ShiftRightLogical +//===----------------------------------------------------------------------===// + +func @shift_right_logical(%arg0: vector<2xi32>, %arg1 : vector<2xi8>) -> vector<2xi32> { + // CHECK: {{%.*}} = spv.ShiftRightLogical {{%.*}}, {{%.*}} : vector<2xi32>, vector<2xi8> + %0 = spv.ShiftRightLogical %arg0, %arg1: vector<2xi32>, vector<2xi8> + spv.ReturnValue %0 : vector<2xi32> +} diff --git a/mlir/test/Dialect/SPIRV/IR/cast-ops.mlir b/mlir/test/Dialect/SPIRV/IR/cast-ops.mlir new file mode 100644 index 00000000000000..c8c8edf258a9ac --- /dev/null +++ b/mlir/test/Dialect/SPIRV/IR/cast-ops.mlir @@ -0,0 +1,262 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s + +//===----------------------------------------------------------------------===// +// spv.Bitcast +//===----------------------------------------------------------------------===// + +func @cast1(%arg0 : f32) { + // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : f32 to i32 + %0 = spv.Bitcast %arg0 : f32 to i32 + return +} + +func @cast2(%arg0 : vector<2xf32>) { + // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : vector<2xf32> to vector<2xi32> + %0 = spv.Bitcast %arg0 : vector<2xf32> to vector<2xi32> + return +} + +func @cast3(%arg0 : vector<2xf32>) { + // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : vector<2xf32> to i64 + %0 = spv.Bitcast %arg0 : vector<2xf32> to i64 + return +} + +func @cast4(%arg0 : !spv.ptr) { + // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : !spv.ptr to !spv.ptr + %0 = spv.Bitcast %arg0 : !spv.ptr to !spv.ptr + return +} + +func @cast5(%arg0 : !spv.ptr) { + // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : !spv.ptr to !spv.ptr, Function> + %0 = spv.Bitcast %arg0 : !spv.ptr to !spv.ptr, Function> + return +} + +func @cast6(%arg0 : vector<4xf32>) { + // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : vector<4xf32> to vector<2xi64> + %0 = spv.Bitcast %arg0 : vector<4xf32> to vector<2xi64> + return +} + +// ----- + +func @cast1(%arg0 : f32) { + // expected-error @+1 {{result type must be different from operand type}} + %0 = spv.Bitcast %arg0 : f32 to f32 + return +} + +// ----- + +func @cast1(%arg0 : f32) { + // expected-error @+1 {{mismatch in result type bitwidth 64 and operand type bitwidth 32}} + %0 = spv.Bitcast %arg0 : f32 to i64 + return +} + +// ----- + +func @cast1(%arg0 : vector<2xf32>) { + // expected-error @+1 {{mismatch in result type bitwidth 96 and operand type bitwidth 64}} + %0 = spv.Bitcast %arg0 : vector<2xf32> to vector<3xf32> + return +} + +// ----- + +func @cast3(%arg0 : !spv.ptr) { + // expected-error @+1 {{unhandled bit cast conversion from pointer type to non-pointer type}} + %0 = spv.Bitcast %arg0 : !spv.ptr to i64 + return +} + +// ----- + +func @cast3(%arg0 : i64) { + // expected-error @+1 {{unhandled bit cast conversion from non-pointer type to pointer type}} + %0 = spv.Bitcast %arg0 : i64 to !spv.ptr + return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.ConvertFToS +//===----------------------------------------------------------------------===// + +func @convert_f_to_s_scalar(%arg0 : f32) -> i32 { + // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : f32 to i32 + %0 = spv.ConvertFToS %arg0 : f32 to i32 + spv.ReturnValue %0 : i32 +} + +// ----- + +func @convert_f64_to_s32_scalar(%arg0 : f64) -> i32 { + // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : f64 to i32 + %0 = spv.ConvertFToS %arg0 : f64 to i32 + spv.ReturnValue %0 : i32 +} + +// ----- + +func @convert_f_to_s_vector(%arg0 : vector<3xf32>) -> vector<3xi32> { + // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : vector<3xf32> to vector<3xi32> + %0 = spv.ConvertFToS %arg0 : vector<3xf32> to vector<3xi32> + spv.ReturnValue %0 : vector<3xi32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.ConvertFToU +//===----------------------------------------------------------------------===// + +func @convert_f_to_u_scalar(%arg0 : f32) -> i32 { + // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : f32 to i32 + %0 = spv.ConvertFToU %arg0 : f32 to i32 + spv.ReturnValue %0 : i32 +} + +// ----- + +func @convert_f64_to_u32_scalar(%arg0 : f64) -> i32 { + // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : f64 to i32 + %0 = spv.ConvertFToU %arg0 : f64 to i32 + spv.ReturnValue %0 : i32 +} + +// ----- + +func @convert_f_to_u_vector(%arg0 : vector<3xf32>) -> vector<3xi32> { + // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : vector<3xf32> to vector<3xi32> + %0 = spv.ConvertFToU %arg0 : vector<3xf32> to vector<3xi32> + spv.ReturnValue %0 : vector<3xi32> +} + +// ----- + +func @convert_f_to_u_coopmatrix(%arg0 : !spv.coopmatrix<8x16xf32, Subgroup>) { + // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xi32, Subgroup> + %0 = spv.ConvertFToU %arg0 : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xi32, Subgroup> + spv.Return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.ConvertSToF +//===----------------------------------------------------------------------===// + +func @convert_s_to_f_scalar(%arg0 : i32) -> f32 { + // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : i32 to f32 + %0 = spv.ConvertSToF %arg0 : i32 to f32 + spv.ReturnValue %0 : f32 +} + +// ----- + +func @convert_s64_to_f32_scalar(%arg0 : i64) -> f32 { + // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : i64 to f32 + %0 = spv.ConvertSToF %arg0 : i64 to f32 + spv.ReturnValue %0 : f32 +} + +// ----- + +func @convert_s_to_f_vector(%arg0 : vector<3xi32>) -> vector<3xf32> { + // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : vector<3xi32> to vector<3xf32> + %0 = spv.ConvertSToF %arg0 : vector<3xi32> to vector<3xf32> + spv.ReturnValue %0 : vector<3xf32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.ConvertUToF +//===----------------------------------------------------------------------===// + +func @convert_u_to_f_scalar(%arg0 : i32) -> f32 { + // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : i32 to f32 + %0 = spv.ConvertUToF %arg0 : i32 to f32 + spv.ReturnValue %0 : f32 +} + +// ----- + +func @convert_u64_to_f32_scalar(%arg0 : i64) -> f32 { + // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : i64 to f32 + %0 = spv.ConvertUToF %arg0 : i64 to f32 + spv.ReturnValue %0 : f32 +} + +// ----- + +func @convert_u_to_f_vector(%arg0 : vector<3xi32>) -> vector<3xf32> { + // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : vector<3xi32> to vector<3xf32> + %0 = spv.ConvertUToF %arg0 : vector<3xi32> to vector<3xf32> + spv.ReturnValue %0 : vector<3xf32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.FConvert +//===----------------------------------------------------------------------===// + +func @f_convert_scalar(%arg0 : f32) -> f64 { + // CHECK: {{%.*}} = spv.FConvert {{%.*}} : f32 to f64 + %0 = spv.FConvert %arg0 : f32 to f64 + spv.ReturnValue %0 : f64 +} + +// ----- + +func @f_convert_vector(%arg0 : vector<3xf32>) -> vector<3xf64> { + // CHECK: {{%.*}} = spv.FConvert {{%.*}} : vector<3xf32> to vector<3xf64> + %0 = spv.FConvert %arg0 : vector<3xf32> to vector<3xf64> + spv.ReturnValue %0 : vector<3xf64> +} + +// ----- + +func @f_convert_coop_matrix(%arg0 : !spv.coopmatrix<8x16xf32, Subgroup>) { + // CHECK: {{%.*}} = spv.FConvert {{%.*}} : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xf64, Subgroup> + %0 = spv.FConvert %arg0 : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xf64, Subgroup> + spv.Return +} + +// ----- + +func @f_convert_vector(%arg0 : f32) -> f32 { + // expected-error @+1 {{expected the different bit widths for operand type and result type, but provided 'f32' and 'f32'}} + %0 = spv.FConvert %arg0 : f32 to f32 + spv.ReturnValue %0 : f32 +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.SConvert +//===----------------------------------------------------------------------===// + +func @s_convert_scalar(%arg0 : i32) -> i64 { + // CHECK: {{%.*}} = spv.SConvert {{%.*}} : i32 to i64 + %0 = spv.SConvert %arg0 : i32 to i64 + spv.ReturnValue %0 : i64 +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.UConvert +//===----------------------------------------------------------------------===// + +func @u_convert_scalar(%arg0 : i32) -> i64 { + // CHECK: {{%.*}} = spv.UConvert {{%.*}} : i32 to i64 + %0 = spv.UConvert %arg0 : i32 to i64 + spv.ReturnValue %0 : i64 +} + diff --git a/mlir/test/Dialect/SPIRV/IR/group-ops.mlir b/mlir/test/Dialect/SPIRV/IR/group-ops.mlir index 55a07270a348f5..c71cc3602b2e8d 100644 --- a/mlir/test/Dialect/SPIRV/IR/group-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/group-ops.mlir @@ -64,6 +64,17 @@ func @group_broadcast_negative_locid_vec4(%value: f32, %localid: vector<4xi32> ) // ----- +//===----------------------------------------------------------------------===// +// spv.SubgroupBallotKHR +//===----------------------------------------------------------------------===// + +func @subgroup_ballot(%predicate: i1) -> vector<4xi32> { + %0 = spv.SubgroupBallotKHR %predicate: vector<4xi32> + return %0: vector<4xi32> +} + +// ----- + //===----------------------------------------------------------------------===// // spv.SubgroupBlockReadINTEL //===----------------------------------------------------------------------===// @@ -100,4 +111,4 @@ func @subgroup_block_write_intel_vector(%ptr : !spv.ptr, %va // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : vector<3xi32> spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : vector<3xi32> return -} \ No newline at end of file +} diff --git a/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir b/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir index d102ae98d3aeca..baf8b45d7eafe8 100644 --- a/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir @@ -32,6 +32,217 @@ func @inotequal_vector(%arg0: vector<4xi32>, %arg1: vector<4xi32>) -> vector<4xi // ----- +//===----------------------------------------------------------------------===// +// spv.LogicalAnd +//===----------------------------------------------------------------------===// + +func @logicalBinary(%arg0 : i1, %arg1 : i1, %arg2 : i1) +{ + // CHECK: [[TMP:%.*]] = spv.LogicalAnd {{%.*}}, {{%.*}} : i1 + %0 = spv.LogicalAnd %arg0, %arg1 : i1 + // CHECK: {{%.*}} = spv.LogicalAnd [[TMP]], {{%.*}} : i1 + %1 = spv.LogicalAnd %0, %arg2 : i1 + return +} + +func @logicalBinary2(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>) +{ + // CHECK: {{%.*}} = spv.LogicalAnd {{%.*}}, {{%.*}} : vector<4xi1> + %0 = spv.LogicalAnd %arg0, %arg1 : vector<4xi1> + return +} + +// ----- + +func @logicalBinary(%arg0 : i1, %arg1 : i1) +{ + // expected-error @+2 {{expected ':'}} + %0 = spv.LogicalAnd %arg0, %arg1 + return +} + +// ----- + +func @logicalBinary(%arg0 : i1, %arg1 : i1) +{ + // expected-error @+2 {{expected non-function type}} + %0 = spv.LogicalAnd %arg0, %arg1 : + return +} + +// ----- + +func @logicalBinary(%arg0 : i1, %arg1 : i1) +{ + // expected-error @+1 {{custom op 'spv.LogicalAnd' expected 2 operands}} + %0 = spv.LogicalAnd %arg0 : i1 + return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.LogicalNot +//===----------------------------------------------------------------------===// + +func @logicalUnary(%arg0 : i1, %arg1 : i1) +{ + // CHECK: [[TMP:%.*]] = spv.LogicalNot {{%.*}} : i1 + %0 = spv.LogicalNot %arg0 : i1 + // CHECK: {{%.*}} = spv.LogicalNot [[TMP]] : i1 + %1 = spv.LogicalNot %0 : i1 + return +} + +func @logicalUnary2(%arg0 : vector<4xi1>) +{ + // CHECK: {{%.*}} = spv.LogicalNot {{%.*}} : vector<4xi1> + %0 = spv.LogicalNot %arg0 : vector<4xi1> + return +} + +// ----- + +func @logicalUnary(%arg0 : i1) +{ + // expected-error @+2 {{expected ':'}} + %0 = spv.LogicalNot %arg0 + return +} + +// ----- + +func @logicalUnary(%arg0 : i1) +{ + // expected-error @+2 {{expected non-function type}} + %0 = spv.LogicalNot %arg0 : + return +} + +// ----- + +func @logicalUnary(%arg0 : i1) +{ + // expected-error @+1 {{expected SSA operand}} + %0 = spv.LogicalNot : i1 + return +} + +// ----- + +func @logicalUnary(%arg0 : i32) +{ + // expected-error @+1 {{operand #0 must be bool or vector of bool values of length 2/3/4/8/16, but got 'i32'}} + %0 = spv.LogicalNot %arg0 : i32 + return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.SelectOp +//===----------------------------------------------------------------------===// + +func @select_op_bool(%arg0: i1) -> () { + %0 = spv.constant true + %1 = spv.constant false + // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, i1 + %2 = spv.Select %arg0, %0, %1 : i1, i1 + return +} + +func @select_op_int(%arg0: i1) -> () { + %0 = spv.constant 2 : i32 + %1 = spv.constant 3 : i32 + // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, i32 + %2 = spv.Select %arg0, %0, %1 : i1, i32 + return +} + +func @select_op_float(%arg0: i1) -> () { + %0 = spv.constant 2.0 : f32 + %1 = spv.constant 3.0 : f32 + // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, f32 + %2 = spv.Select %arg0, %0, %1 : i1, f32 + return +} + +func @select_op_ptr(%arg0: i1) -> () { + %0 = spv.Variable : !spv.ptr + %1 = spv.Variable : !spv.ptr + // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, !spv.ptr + %2 = spv.Select %arg0, %0, %1 : i1, !spv.ptr + return +} + +func @select_op_vec(%arg0: i1) -> () { + %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> + %1 = spv.constant dense<[5.0, 6.0, 7.0]> : vector<3xf32> + // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, vector<3xf32> + %2 = spv.Select %arg0, %0, %1 : i1, vector<3xf32> + return +} + +func @select_op_vec_condn_vec(%arg0: vector<3xi1>) -> () { + %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> + %1 = spv.constant dense<[5.0, 6.0, 7.0]> : vector<3xf32> + // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi1>, vector<3xf32> + %2 = spv.Select %arg0, %0, %1 : vector<3xi1>, vector<3xf32> + return +} + +// ----- + +func @select_op(%arg0: i1) -> () { + %0 = spv.constant 2 : i32 + %1 = spv.constant 3 : i32 + // expected-error @+2 {{expected ','}} + %2 = spv.Select %arg0, %0, %1 : i1 + return +} + +// ----- + +func @select_op(%arg1: vector<3xi1>) -> () { + %0 = spv.constant 2 : i32 + %1 = spv.constant 3 : i32 + // expected-error @+1 {{result expected to be of vector type when condition is of vector type}} + %2 = spv.Select %arg1, %0, %1 : vector<3xi1>, i32 + return +} + +// ----- + +func @select_op(%arg1: vector<4xi1>) -> () { + %0 = spv.constant dense<[2, 3, 4]> : vector<3xi32> + %1 = spv.constant dense<[5, 6, 7]> : vector<3xi32> + // expected-error @+1 {{result should have the same number of elements as the condition when condition is of vector type}} + %2 = spv.Select %arg1, %0, %1 : vector<4xi1>, vector<3xi32> + return +} + +// ----- + +func @select_op(%arg1: vector<4xi1>) -> () { + %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> + %1 = spv.constant dense<[5, 6, 7]> : vector<3xi32> + // expected-error @+1 {{all of {true_value, false_value, result} have same type}} + %2 = "spv.Select"(%arg1, %0, %1) : (vector<4xi1>, vector<3xf32>, vector<3xi32>) -> vector<3xi32> + return +} + +// ----- + +func @select_op(%arg1: vector<4xi1>) -> () { + %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> + %1 = spv.constant dense<[5, 6, 7]> : vector<3xi32> + // expected-error @+1 {{all of {true_value, false_value, result} have same type}} + %2 = "spv.Select"(%arg1, %1, %0) : (vector<4xi1>, vector<3xi32>, vector<3xf32>) -> vector<3xi32> + return +} + +// ----- + //===----------------------------------------------------------------------===// // spv.SGreaterThan //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir new file mode 100644 index 00000000000000..d6f50a6be6f5c0 --- /dev/null +++ b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir @@ -0,0 +1,629 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s + +//===----------------------------------------------------------------------===// +// spv.AccessChain +//===----------------------------------------------------------------------===// + +func @access_chain_struct() -> () { + %0 = spv.constant 1: i32 + %1 = spv.Variable : !spv.ptr)>, Function> + // CHECK: spv.AccessChain {{.*}}[{{.*}}, {{.*}}] : !spv.ptr)>, Function> + %2 = spv.AccessChain %1[%0, %0] : !spv.ptr)>, Function>, i32, i32 + return +} + +func @access_chain_1D_array(%arg0 : i32) -> () { + %0 = spv.Variable : !spv.ptr, Function> + // CHECK: spv.AccessChain {{.*}}[{{.*}}] : !spv.ptr, Function> + %1 = spv.AccessChain %0[%arg0] : !spv.ptr, Function>, i32 + return +} + +func @access_chain_2D_array_1(%arg0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // CHECK: spv.AccessChain {{.*}}[{{.*}}, {{.*}}] : !spv.ptr>, Function> + %1 = spv.AccessChain %0[%arg0, %arg0] : !spv.ptr>, Function>, i32, i32 + %2 = spv.Load "Function" %1 ["Volatile"] : f32 + return +} + +func @access_chain_2D_array_2(%arg0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // CHECK: spv.AccessChain {{.*}}[{{.*}}] : !spv.ptr>, Function> + %1 = spv.AccessChain %0[%arg0] : !spv.ptr>, Function>, i32 + %2 = spv.Load "Function" %1 ["Volatile"] : !spv.array<4xf32> + return +} + +func @access_chain_rtarray(%arg0 : i32) -> () { + %0 = spv.Variable : !spv.ptr, Function> + // CHECK: spv.AccessChain {{.*}}[{{.*}}] : !spv.ptr, Function> + %1 = spv.AccessChain %0[%arg0] : !spv.ptr, Function>, i32 + %2 = spv.Load "Function" %1 ["Volatile"] : f32 + return +} + +// ----- + +func @access_chain_non_composite() -> () { + %0 = spv.constant 1: i32 + %1 = spv.Variable : !spv.ptr + // expected-error @+1 {{cannot extract from non-composite type 'f32' with index 0}} + %2 = spv.AccessChain %1[%0] : !spv.ptr, i32 + return +} + +// ----- + +func @access_chain_no_indices(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // expected-error @+1 {{expected at least one index}} + %1 = spv.AccessChain %0[] : !spv.ptr>, Function>, i32 + return +} + +// ----- + +func @access_chain_missing_comma(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // expected-error @+1 {{expected ','}} + %1 = spv.AccessChain %0[%index0] : !spv.ptr>, Function> i32 + return +} + +// ----- + +func @access_chain_invalid_indices_types_count(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // expected-error @+1 {{'spv.AccessChain' op indices types' count must be equal to indices info count}} + %1 = spv.AccessChain %0[%index0] : !spv.ptr>, Function>, i32, i32 + return +} + +// ----- + +func @access_chain_missing_indices_type(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // expected-error @+1 {{'spv.AccessChain' op indices types' count must be equal to indices info count}} + %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr>, Function>, i32 + return +} + +// ----- + +func @access_chain_invalid_type(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + %1 = spv.Load "Function" %0 ["Volatile"] : !spv.array<4x!spv.array<4xf32>> + // expected-error @+1 {{expected a pointer to composite type, but provided '!spv.array<4 x !spv.array<4 x f32>>'}} + %2 = spv.AccessChain %1[%index0] : !spv.array<4x!spv.array<4xf32>>, i32 + return +} + +// ----- + +func @access_chain_invalid_index_1(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // expected-error @+1 {{expected SSA operand}} + %1 = spv.AccessChain %0[%index, 4] : !spv.ptr>, Function>, i32, i32 + return +} + +// ----- + +func @access_chain_invalid_index_2(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr)>, Function> + // expected-error @+1 {{index must be an integer spv.constant to access element of spv.struct}} + %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr)>, Function>, i32, i32 + return +} + +// ----- + +func @access_chain_invalid_constant_type_1() -> () { + %0 = std.constant 1: i32 + %1 = spv.Variable : !spv.ptr)>, Function> + // expected-error @+1 {{index must be an integer spv.constant to access element of spv.struct, but provided std.constant}} + %2 = spv.AccessChain %1[%0, %0] : !spv.ptr)>, Function>, i32, i32 + return +} + +// ----- + +func @access_chain_out_of_bounds() -> () { + %index0 = "spv.constant"() { value = 12: i32} : () -> i32 + %0 = spv.Variable : !spv.ptr)>, Function> + // expected-error @+1 {{'spv.AccessChain' op index 12 out of bounds for '!spv.struct<(f32, !spv.array<4 x f32>)>'}} + %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr)>, Function>, i32, i32 + return +} + +// ----- + +func @access_chain_invalid_accessing_type(%index0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // expected-error @+1 {{cannot extract from non-composite type 'f32' with index 0}} + %1 = spv.AccessChain %0[%index, %index0, %index0] : !spv.ptr>, Function>, i32, i32, i32 + return + +// ----- + +//===----------------------------------------------------------------------===// +// spv.LoadOp +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @simple_load +func @simple_load() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load "Function" %{{.*}} : f32 + %1 = spv.Load "Function" %0 : f32 + return +} + +// CHECK-LABEL: @load_none_access +func @load_none_access() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load "Function" %{{.*}} ["None"] : f32 + %1 = spv.Load "Function" %0 ["None"] : f32 + return +} + +// CHECK-LABEL: @volatile_load +func @volatile_load() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load "Function" %{{.*}} ["Volatile"] : f32 + %1 = spv.Load "Function" %0 ["Volatile"] : f32 + return +} + +// CHECK-LABEL: @aligned_load +func @aligned_load() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load "Function" %{{.*}} ["Aligned", 4] : f32 + %1 = spv.Load "Function" %0 ["Aligned", 4] : f32 + return +} + +// CHECK-LABEL: @volatile_aligned_load +func @volatile_aligned_load() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load "Function" %{{.*}} ["Volatile|Aligned", 4] : f32 + %1 = spv.Load "Function" %0 ["Volatile|Aligned", 4] : f32 + return +} + +// ----- + +// CHECK-LABEL: load_none_access +func @load_none_access() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load + // CHECK-SAME: ["None"] + %1 = "spv.Load"(%0) {memory_access = 0 : i32} : (!spv.ptr) -> (f32) + return +} + +// CHECK-LABEL: volatile_load +func @volatile_load() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load + // CHECK-SAME: ["Volatile"] + %1 = "spv.Load"(%0) {memory_access = 1 : i32} : (!spv.ptr) -> (f32) + return +} + +// CHECK-LABEL: aligned_load +func @aligned_load() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load + // CHECK-SAME: ["Aligned", 4] + %1 = "spv.Load"(%0) {memory_access = 2 : i32, alignment = 4 : i32} : (!spv.ptr) -> (f32) + return +} + +// CHECK-LABEL: volatile_aligned_load +func @volatile_aligned_load() -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Load + // CHECK-SAME: ["Volatile|Aligned", 4] + %1 = "spv.Load"(%0) {memory_access = 3 : i32, alignment = 4 : i32} : (!spv.ptr) -> (f32) + return +} + +// ----- + +func @simple_load_missing_storageclass() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected non-function type}} + %1 = spv.Load %0 : f32 + return +} + +// ----- + +func @simple_load_missing_operand() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected SSA operand}} + %1 = spv.Load "Function" : f32 + return +} + +// ----- + +func @simple_load_missing_rettype() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+2 {{expected ':'}} + %1 = spv.Load "Function" %0 + return +} + +// ----- + +func @volatile_load_missing_lbrace() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ':'}} + %1 = spv.Load "Function" %0 "Volatile"] : f32 + return +} + +// ----- + +func @volatile_load_missing_rbrace() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ']'}} + %1 = spv.Load "Function" %0 ["Volatile"} : f32 + return +} + +// ----- + +func @aligned_load_missing_alignment() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ','}} + %1 = spv.Load "Function" %0 ["Aligned"] : f32 + return +} + +// ----- + +func @aligned_load_missing_comma() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ','}} + %1 = spv.Load "Function" %0 ["Aligned" 4] : f32 + return +} + +// ----- + +func @load_incorrect_attributes() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ']'}} + %1 = spv.Load "Function" %0 ["Volatile", 4] : f32 + return +} + +// ----- + +func @load_unknown_memory_access() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{custom op 'spv.Load' invalid memory_access attribute specification: "Something"}} + %1 = spv.Load "Function" %0 ["Something"] : f32 + return +} + +// ----- + +func @load_unknown_memory_access() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{custom op 'spv.Load' invalid memory_access attribute specification: "Volatile|Something"}} + %1 = spv.Load "Function" %0 ["Volatile|Something"] : f32 + return +} + +// ----- + +func @load_unknown_memory_access() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{failed to satisfy constraint: valid SPIR-V MemoryAccess}} + %1 = "spv.Load"(%0) {memory_access = 0x80000000 : i32} : (!spv.ptr) -> (f32) + return +} + +// ----- + +func @aligned_load_incorrect_attributes() -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ']'}} + %1 = spv.Load "Function" %0 ["Aligned", 4, 23] : f32 + return +} + +// ----- + +spv.module Logical GLSL450 { + spv.globalVariable @var0 : !spv.ptr + // CHECK_LABEL: @simple_load + spv.func @simple_load() -> () "None" { + // CHECK: spv.Load "Input" {{%.*}} : f32 + %0 = spv.mlir.addressof @var0 : !spv.ptr + %1 = spv.Load "Input" %0 : f32 + spv.Return + } +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.StoreOp +//===----------------------------------------------------------------------===// + +func @simple_store(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Store "Function" %0, %arg0 : f32 + spv.Store "Function" %0, %arg0 : f32 + return +} + +// CHECK_LABEL: @volatile_store +func @volatile_store(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Store "Function" %0, %arg0 ["Volatile"] : f32 + spv.Store "Function" %0, %arg0 ["Volatile"] : f32 + return +} + +// CHECK_LABEL: @aligned_store +func @aligned_store(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // CHECK: spv.Store "Function" %0, %arg0 ["Aligned", 4] : f32 + spv.Store "Function" %0, %arg0 ["Aligned", 4] : f32 + return +} + +// ----- + +func @simple_store_missing_ptr_type(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected non-function type}} + spv.Store %0, %arg0 : f32 + return +} + +// ----- + +func @simple_store_missing_operand(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{custom op 'spv.Store' invalid operand}} : f32 + spv.Store "Function" , %arg0 : f32 + return +} + +// ----- + +func @simple_store_missing_operand(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{custom op 'spv.Store' expected 2 operands}} : f32 + spv.Store "Function" %0 : f32 + return +} + +// ----- + +func @volatile_store_missing_lbrace(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ':'}} + spv.Store "Function" %0, %arg0 "Volatile"] : f32 + return +} + +// ----- + +func @volatile_store_missing_rbrace(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ']'}} + spv.Store "Function" %0, %arg0 ["Volatile"} : f32 + return +} + +// ----- + +func @aligned_store_missing_alignment(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ','}} + spv.Store "Function" %0, %arg0 ["Aligned"] : f32 + return +} + +// ----- + +func @aligned_store_missing_comma(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ','}} + spv.Store "Function" %0, %arg0 ["Aligned" 4] : f32 + return +} + +// ----- + +func @load_incorrect_attributes(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ']'}} + spv.Store "Function" %0, %arg0 ["Volatile", 4] : f32 + return +} + +// ----- + +func @aligned_store_incorrect_attributes(%arg0 : f32) -> () { + %0 = spv.Variable : !spv.ptr + // expected-error @+1 {{expected ']'}} + spv.Store "Function" %0, %arg0 ["Aligned", 4, 23] : f32 + return +} + +// ----- + +spv.module Logical GLSL450 { + spv.globalVariable @var0 : !spv.ptr + spv.func @simple_store(%arg0 : f32) -> () "None" { + %0 = spv.mlir.addressof @var0 : !spv.ptr + // CHECK: spv.Store "Input" {{%.*}}, {{%.*}} : f32 + spv.Store "Input" %0, %arg0 : f32 + spv.Return + } +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.Variable +//===----------------------------------------------------------------------===// + +func @variable(%arg0: f32) -> () { + // CHECK: spv.Variable : !spv.ptr + %0 = spv.Variable : !spv.ptr + return +} + +// ----- + +func @variable_init_normal_constant() -> () { + %0 = spv.constant 4.0 : f32 + // CHECK: spv.Variable init(%0) : !spv.ptr + %1 = spv.Variable init(%0) : !spv.ptr + return +} + +// ----- + +spv.module Logical GLSL450 { + spv.globalVariable @global : !spv.ptr + spv.func @variable_init_global_variable() -> () "None" { + %0 = spv.mlir.addressof @global : !spv.ptr + // CHECK: spv.Variable init({{.*}}) : !spv.ptr, Function> + %1 = spv.Variable init(%0) : !spv.ptr, Function> + spv.Return + } +} + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc = 42 : i32 + // CHECK-LABEL: @variable_init_spec_constant + spv.func @variable_init_spec_constant() -> () "None" { + %0 = spv.mlir.referenceof @sc : i32 + // CHECK: spv.Variable init(%0) : !spv.ptr + %1 = spv.Variable init(%0) : !spv.ptr + spv.Return + } +} + +// ----- + +func @variable_bind() -> () { + // expected-error @+1 {{cannot have 'descriptor_set' attribute (only allowed in spv.globalVariable)}} + %0 = spv.Variable bind(1, 2) : !spv.ptr + return +} + +// ----- + +func @variable_init_bind() -> () { + %0 = spv.constant 4.0 : f32 + // expected-error @+1 {{cannot have 'binding' attribute (only allowed in spv.globalVariable)}} + %1 = spv.Variable init(%0) {binding = 5 : i32} : !spv.ptr + return +} + +// ----- + +func @variable_builtin() -> () { + // expected-error @+1 {{cannot have 'built_in' attribute (only allowed in spv.globalVariable)}} + %1 = spv.Variable built_in("GlobalInvocationID") : !spv.ptr, Function> + return +} + +// ----- + +func @expect_ptr_result_type(%arg0: f32) -> () { + // expected-error @+1 {{expected spv.ptr type}} + %0 = spv.Variable : f32 + return +} + +// ----- + +func @variable_init(%arg0: f32) -> () { + // expected-error @+1 {{op initializer must be the result of a constant or spv.globalVariable op}} + %0 = spv.Variable init(%arg0) : !spv.ptr + return +} + +// ----- + +func @cannot_be_generic_storage_class(%arg0: f32) -> () { + // expected-error @+1 {{op can only be used to model function-level variables. Use spv.globalVariable for module-level variables}} + %0 = spv.Variable : !spv.ptr + return +} + +// ----- + +func @copy_memory_incompatible_ptrs() { + %0 = spv.Variable : !spv.ptr + %1 = spv.Variable : !spv.ptr + // expected-error @+1 {{both operands must be pointers to the same type}} + "spv.CopyMemory"(%0, %1) {} : (!spv.ptr, !spv.ptr) -> () + spv.Return +} + +// ----- + +func @copy_memory_invalid_maa() { + %0 = spv.Variable : !spv.ptr + %1 = spv.Variable : !spv.ptr + // expected-error @+1 {{missing alignment value}} + "spv.CopyMemory"(%0, %1) {memory_access=0x0002 : i32} : (!spv.ptr, !spv.ptr) -> () + spv.Return +} + +// ----- + +func @copy_memory_invalid_source_maa() { + %0 = spv.Variable : !spv.ptr + %1 = spv.Variable : !spv.ptr + // expected-error @+1 {{invalid alignment specification with non-aligned memory access specification}} + "spv.CopyMemory"(%0, %1) {source_memory_access=0x0001 : i32, memory_access=0x0002 : i32, source_alignment=8 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () + spv.Return +} + +// ----- + +func @copy_memory_invalid_source_maa2() { + %0 = spv.Variable : !spv.ptr + %1 = spv.Variable : !spv.ptr + // expected-error @+1 {{missing alignment value}} + "spv.CopyMemory"(%0, %1) {source_memory_access=0x0002 : i32, memory_access=0x0002 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () + spv.Return +} + +// ----- + +func @copy_memory_print_maa() { + %0 = spv.Variable : !spv.ptr + %1 = spv.Variable : !spv.ptr + + // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Volatile"] : f32 + "spv.CopyMemory"(%0, %1) {memory_access=0x0001 : i32} : (!spv.ptr, !spv.ptr) -> () + + // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Aligned", 4] : f32 + "spv.CopyMemory"(%0, %1) {memory_access=0x0002 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () + + // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Aligned", 4], ["Volatile"] : f32 + "spv.CopyMemory"(%0, %1) {source_memory_access=0x0001 : i32, memory_access=0x0002 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () + + // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Aligned", 4], ["Aligned", 8] : f32 + "spv.CopyMemory"(%0, %1) {source_memory_access=0x0002 : i32, memory_access=0x0002 : i32, source_alignment=8 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () + + spv.Return +} diff --git a/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir b/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir new file mode 100644 index 00000000000000..537dccee0b447c --- /dev/null +++ b/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s + +//===----------------------------------------------------------------------===// +// spv.undef +//===----------------------------------------------------------------------===// + +func @undef() -> () { + // CHECK: %{{.*}} = spv.undef : f32 + %0 = spv.undef : f32 + // CHECK: %{{.*}} = spv.undef : vector<4xf32> + %1 = spv.undef : vector<4xf32> + spv.Return +} + +// ----- + +func @undef() -> () { + // expected-error @+2{{expected non-function type}} + %0 = spv.undef : + spv.Return +} + +// ----- + +func @undef() -> () { + // expected-error @+2{{expected ':'}} + %0 = spv.undef + spv.Return +} diff --git a/mlir/test/Dialect/SPIRV/IR/ops.mlir b/mlir/test/Dialect/SPIRV/IR/ops.mlir deleted file mode 100644 index 92cca0122cf717..00000000000000 --- a/mlir/test/Dialect/SPIRV/IR/ops.mlir +++ /dev/null @@ -1,1355 +0,0 @@ -// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -verify-diagnostics %s | FileCheck %s - -//===----------------------------------------------------------------------===// -// spv.AccessChain -//===----------------------------------------------------------------------===// - -func @access_chain_struct() -> () { - %0 = spv.constant 1: i32 - %1 = spv.Variable : !spv.ptr)>, Function> - // CHECK: spv.AccessChain {{.*}}[{{.*}}, {{.*}}] : !spv.ptr)>, Function> - %2 = spv.AccessChain %1[%0, %0] : !spv.ptr)>, Function>, i32, i32 - return -} - -func @access_chain_1D_array(%arg0 : i32) -> () { - %0 = spv.Variable : !spv.ptr, Function> - // CHECK: spv.AccessChain {{.*}}[{{.*}}] : !spv.ptr, Function> - %1 = spv.AccessChain %0[%arg0] : !spv.ptr, Function>, i32 - return -} - -func @access_chain_2D_array_1(%arg0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // CHECK: spv.AccessChain {{.*}}[{{.*}}, {{.*}}] : !spv.ptr>, Function> - %1 = spv.AccessChain %0[%arg0, %arg0] : !spv.ptr>, Function>, i32, i32 - %2 = spv.Load "Function" %1 ["Volatile"] : f32 - return -} - -func @access_chain_2D_array_2(%arg0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // CHECK: spv.AccessChain {{.*}}[{{.*}}] : !spv.ptr>, Function> - %1 = spv.AccessChain %0[%arg0] : !spv.ptr>, Function>, i32 - %2 = spv.Load "Function" %1 ["Volatile"] : !spv.array<4xf32> - return -} - -func @access_chain_rtarray(%arg0 : i32) -> () { - %0 = spv.Variable : !spv.ptr, Function> - // CHECK: spv.AccessChain {{.*}}[{{.*}}] : !spv.ptr, Function> - %1 = spv.AccessChain %0[%arg0] : !spv.ptr, Function>, i32 - %2 = spv.Load "Function" %1 ["Volatile"] : f32 - return -} - -// ----- - -func @access_chain_non_composite() -> () { - %0 = spv.constant 1: i32 - %1 = spv.Variable : !spv.ptr - // expected-error @+1 {{cannot extract from non-composite type 'f32' with index 0}} - %2 = spv.AccessChain %1[%0] : !spv.ptr, i32 - return -} - -// ----- - -func @access_chain_no_indices(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // expected-error @+1 {{expected at least one index}} - %1 = spv.AccessChain %0[] : !spv.ptr>, Function>, i32 - return -} - -// ----- - -func @access_chain_missing_comma(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // expected-error @+1 {{expected ','}} - %1 = spv.AccessChain %0[%index0] : !spv.ptr>, Function> i32 - return -} - -// ----- - -func @access_chain_invalid_indices_types_count(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // expected-error @+1 {{'spv.AccessChain' op indices types' count must be equal to indices info count}} - %1 = spv.AccessChain %0[%index0] : !spv.ptr>, Function>, i32, i32 - return -} - -// ----- - -func @access_chain_missing_indices_type(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // expected-error @+1 {{'spv.AccessChain' op indices types' count must be equal to indices info count}} - %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr>, Function>, i32 - return -} - -// ----- - -func @access_chain_invalid_type(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - %1 = spv.Load "Function" %0 ["Volatile"] : !spv.array<4x!spv.array<4xf32>> - // expected-error @+1 {{expected a pointer to composite type, but provided '!spv.array<4 x !spv.array<4 x f32>>'}} - %2 = spv.AccessChain %1[%index0] : !spv.array<4x!spv.array<4xf32>>, i32 - return -} - -// ----- - -func @access_chain_invalid_index_1(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // expected-error @+1 {{expected SSA operand}} - %1 = spv.AccessChain %0[%index, 4] : !spv.ptr>, Function>, i32, i32 - return -} - -// ----- - -func @access_chain_invalid_index_2(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr)>, Function> - // expected-error @+1 {{index must be an integer spv.constant to access element of spv.struct}} - %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr)>, Function>, i32, i32 - return -} - -// ----- - -func @access_chain_invalid_constant_type_1() -> () { - %0 = std.constant 1: i32 - %1 = spv.Variable : !spv.ptr)>, Function> - // expected-error @+1 {{index must be an integer spv.constant to access element of spv.struct, but provided std.constant}} - %2 = spv.AccessChain %1[%0, %0] : !spv.ptr)>, Function>, i32, i32 - return -} - -// ----- - -func @access_chain_out_of_bounds() -> () { - %index0 = "spv.constant"() { value = 12: i32} : () -> i32 - %0 = spv.Variable : !spv.ptr)>, Function> - // expected-error @+1 {{'spv.AccessChain' op index 12 out of bounds for '!spv.struct<(f32, !spv.array<4 x f32>)>'}} - %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr)>, Function>, i32, i32 - return -} - -// ----- - -func @access_chain_invalid_accessing_type(%index0 : i32) -> () { - %0 = spv.Variable : !spv.ptr>, Function> - // expected-error @+1 {{cannot extract from non-composite type 'f32' with index 0}} - %1 = spv.AccessChain %0[%index, %index0, %index0] : !spv.ptr>, Function>, i32, i32, i32 - return - -// ----- - -//===----------------------------------------------------------------------===// -// spv.Bitcast -//===----------------------------------------------------------------------===// - -func @cast1(%arg0 : f32) { - // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : f32 to i32 - %0 = spv.Bitcast %arg0 : f32 to i32 - return -} - -func @cast2(%arg0 : vector<2xf32>) { - // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : vector<2xf32> to vector<2xi32> - %0 = spv.Bitcast %arg0 : vector<2xf32> to vector<2xi32> - return -} - -func @cast3(%arg0 : vector<2xf32>) { - // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : vector<2xf32> to i64 - %0 = spv.Bitcast %arg0 : vector<2xf32> to i64 - return -} - -func @cast4(%arg0 : !spv.ptr) { - // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : !spv.ptr to !spv.ptr - %0 = spv.Bitcast %arg0 : !spv.ptr to !spv.ptr - return -} - -func @cast5(%arg0 : !spv.ptr) { - // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : !spv.ptr to !spv.ptr, Function> - %0 = spv.Bitcast %arg0 : !spv.ptr to !spv.ptr, Function> - return -} - -func @cast6(%arg0 : vector<4xf32>) { - // CHECK: {{%.*}} = spv.Bitcast {{%.*}} : vector<4xf32> to vector<2xi64> - %0 = spv.Bitcast %arg0 : vector<4xf32> to vector<2xi64> - return -} - -// ----- - -func @cast1(%arg0 : f32) { - // expected-error @+1 {{result type must be different from operand type}} - %0 = spv.Bitcast %arg0 : f32 to f32 - return -} - -// ----- - -func @cast1(%arg0 : f32) { - // expected-error @+1 {{mismatch in result type bitwidth 64 and operand type bitwidth 32}} - %0 = spv.Bitcast %arg0 : f32 to i64 - return -} - -// ----- - -func @cast1(%arg0 : vector<2xf32>) { - // expected-error @+1 {{mismatch in result type bitwidth 96 and operand type bitwidth 64}} - %0 = spv.Bitcast %arg0 : vector<2xf32> to vector<3xf32> - return -} - -// ----- - -func @cast3(%arg0 : !spv.ptr) { - // expected-error @+1 {{unhandled bit cast conversion from pointer type to non-pointer type}} - %0 = spv.Bitcast %arg0 : !spv.ptr to i64 - return -} - -// ----- - -func @cast3(%arg0 : i64) { - // expected-error @+1 {{unhandled bit cast conversion from non-pointer type to pointer type}} - %0 = spv.Bitcast %arg0 : i64 to !spv.ptr - return -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.BitCount -//===----------------------------------------------------------------------===// - -func @bitcount(%arg: i32) -> i32 { - // CHECK: spv.BitCount {{%.*}} : i32 - %0 = spv.BitCount %arg : i32 - spv.ReturnValue %0 : i32 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.BitFieldInsert -//===----------------------------------------------------------------------===// - -func @bit_field_insert_vec(%base: vector<3xi32>, %insert: vector<3xi32>, %offset: i32, %count: i16) -> vector<3xi32> { - // CHECK: {{%.*}} = spv.BitFieldInsert {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi32>, i32, i16 - %0 = spv.BitFieldInsert %base, %insert, %offset, %count : vector<3xi32>, i32, i16 - spv.ReturnValue %0 : vector<3xi32> -} - -// ----- - -func @bit_field_insert_invalid_insert_type(%base: vector<3xi32>, %insert: vector<2xi32>, %offset: i32, %count: i16) -> vector<3xi32> { - // expected-error @+1 {{all of {base, insert, result} have same type}} - %0 = "spv.BitFieldInsert" (%base, %insert, %offset, %count) : (vector<3xi32>, vector<2xi32>, i32, i16) -> vector<3xi32> - spv.ReturnValue %0 : vector<3xi32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.BitFieldSExtract -//===----------------------------------------------------------------------===// - -func @bit_field_s_extract_vec(%base: vector<3xi32>, %offset: i8, %count: i8) -> vector<3xi32> { - // CHECK: {{%.*}} = spv.BitFieldSExtract {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi32>, i8, i8 - %0 = spv.BitFieldSExtract %base, %offset, %count : vector<3xi32>, i8, i8 - spv.ReturnValue %0 : vector<3xi32> -} - -//===----------------------------------------------------------------------===// -// spv.BitFieldUExtract -//===----------------------------------------------------------------------===// - -func @bit_field_u_extract_vec(%base: vector<3xi32>, %offset: i8, %count: i8) -> vector<3xi32> { - // CHECK: {{%.*}} = spv.BitFieldUExtract {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi32>, i8, i8 - %0 = spv.BitFieldUExtract %base, %offset, %count : vector<3xi32>, i8, i8 - spv.ReturnValue %0 : vector<3xi32> -} - -// ----- - -func @bit_field_u_extract_invalid_result_type(%base: vector<3xi32>, %offset: i32, %count: i16) -> vector<4xi32> { - // expected-error @+1 {{failed to verify that all of {base, result} have same type}} - %0 = "spv.BitFieldUExtract" (%base, %offset, %count) : (vector<3xi32>, i32, i16) -> vector<4xi32> - spv.ReturnValue %0 : vector<4xi32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.BitReverse -//===----------------------------------------------------------------------===// - -func @bitreverse(%arg: i32) -> i32 { - // CHECK: spv.BitReverse {{%.*}} : i32 - %0 = spv.BitReverse %arg : i32 - spv.ReturnValue %0 : i32 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ControlBarrier -//===----------------------------------------------------------------------===// - -func @control_barrier_0() -> () { - // CHECK: spv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory" - spv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory" - return -} - -// ----- - -func @control_barrier_1() -> () { - // expected-error @+1 {{invalid execution_scope attribute specification: "Something"}} - spv.ControlBarrier "Something", "Device", "Acquire|UniformMemory" - return -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ConvertFToS -//===----------------------------------------------------------------------===// - -func @convert_f_to_s_scalar(%arg0 : f32) -> i32 { - // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : f32 to i32 - %0 = spv.ConvertFToS %arg0 : f32 to i32 - spv.ReturnValue %0 : i32 -} - -// ----- - -func @convert_f64_to_s32_scalar(%arg0 : f64) -> i32 { - // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : f64 to i32 - %0 = spv.ConvertFToS %arg0 : f64 to i32 - spv.ReturnValue %0 : i32 -} - -// ----- - -func @convert_f_to_s_vector(%arg0 : vector<3xf32>) -> vector<3xi32> { - // CHECK: {{%.*}} = spv.ConvertFToS {{%.*}} : vector<3xf32> to vector<3xi32> - %0 = spv.ConvertFToS %arg0 : vector<3xf32> to vector<3xi32> - spv.ReturnValue %0 : vector<3xi32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ConvertFToU -//===----------------------------------------------------------------------===// - -func @convert_f_to_u_scalar(%arg0 : f32) -> i32 { - // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : f32 to i32 - %0 = spv.ConvertFToU %arg0 : f32 to i32 - spv.ReturnValue %0 : i32 -} - -// ----- - -func @convert_f64_to_u32_scalar(%arg0 : f64) -> i32 { - // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : f64 to i32 - %0 = spv.ConvertFToU %arg0 : f64 to i32 - spv.ReturnValue %0 : i32 -} - -// ----- - -func @convert_f_to_u_vector(%arg0 : vector<3xf32>) -> vector<3xi32> { - // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : vector<3xf32> to vector<3xi32> - %0 = spv.ConvertFToU %arg0 : vector<3xf32> to vector<3xi32> - spv.ReturnValue %0 : vector<3xi32> -} - -// ----- - -func @convert_f_to_u_coopmatrix(%arg0 : !spv.coopmatrix<8x16xf32, Subgroup>) { - // CHECK: {{%.*}} = spv.ConvertFToU {{%.*}} : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xi32, Subgroup> - %0 = spv.ConvertFToU %arg0 : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xi32, Subgroup> - spv.Return -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ConvertSToF -//===----------------------------------------------------------------------===// - -func @convert_s_to_f_scalar(%arg0 : i32) -> f32 { - // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : i32 to f32 - %0 = spv.ConvertSToF %arg0 : i32 to f32 - spv.ReturnValue %0 : f32 -} - -// ----- - -func @convert_s64_to_f32_scalar(%arg0 : i64) -> f32 { - // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : i64 to f32 - %0 = spv.ConvertSToF %arg0 : i64 to f32 - spv.ReturnValue %0 : f32 -} - -// ----- - -func @convert_s_to_f_vector(%arg0 : vector<3xi32>) -> vector<3xf32> { - // CHECK: {{%.*}} = spv.ConvertSToF {{%.*}} : vector<3xi32> to vector<3xf32> - %0 = spv.ConvertSToF %arg0 : vector<3xi32> to vector<3xf32> - spv.ReturnValue %0 : vector<3xf32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ConvertUToF -//===----------------------------------------------------------------------===// - -func @convert_u_to_f_scalar(%arg0 : i32) -> f32 { - // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : i32 to f32 - %0 = spv.ConvertUToF %arg0 : i32 to f32 - spv.ReturnValue %0 : f32 -} - -// ----- - -func @convert_u64_to_f32_scalar(%arg0 : i64) -> f32 { - // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : i64 to f32 - %0 = spv.ConvertUToF %arg0 : i64 to f32 - spv.ReturnValue %0 : f32 -} - -// ----- - -func @convert_u_to_f_vector(%arg0 : vector<3xi32>) -> vector<3xf32> { - // CHECK: {{%.*}} = spv.ConvertUToF {{%.*}} : vector<3xi32> to vector<3xf32> - %0 = spv.ConvertUToF %arg0 : vector<3xi32> to vector<3xf32> - spv.ReturnValue %0 : vector<3xf32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.FConvert -//===----------------------------------------------------------------------===// - -func @f_convert_scalar(%arg0 : f32) -> f64 { - // CHECK: {{%.*}} = spv.FConvert {{%.*}} : f32 to f64 - %0 = spv.FConvert %arg0 : f32 to f64 - spv.ReturnValue %0 : f64 -} - -// ----- - -func @f_convert_vector(%arg0 : vector<3xf32>) -> vector<3xf64> { - // CHECK: {{%.*}} = spv.FConvert {{%.*}} : vector<3xf32> to vector<3xf64> - %0 = spv.FConvert %arg0 : vector<3xf32> to vector<3xf64> - spv.ReturnValue %0 : vector<3xf64> -} - -// ----- - -func @f_convert_coop_matrix(%arg0 : !spv.coopmatrix<8x16xf32, Subgroup>) { - // CHECK: {{%.*}} = spv.FConvert {{%.*}} : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xf64, Subgroup> - %0 = spv.FConvert %arg0 : !spv.coopmatrix<8x16xf32, Subgroup> to !spv.coopmatrix<8x16xf64, Subgroup> - spv.Return -} - -// ----- - -func @f_convert_vector(%arg0 : f32) -> f32 { - // expected-error @+1 {{expected the different bit widths for operand type and result type, but provided 'f32' and 'f32'}} - %0 = spv.FConvert %arg0 : f32 to f32 - spv.ReturnValue %0 : f32 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.SConvert -//===----------------------------------------------------------------------===// - -func @s_convert_scalar(%arg0 : i32) -> i64 { - // CHECK: {{%.*}} = spv.SConvert {{%.*}} : i32 to i64 - %0 = spv.SConvert %arg0 : i32 to i64 - spv.ReturnValue %0 : i64 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.UConvert -//===----------------------------------------------------------------------===// - -func @u_convert_scalar(%arg0 : i32) -> i64 { - // CHECK: {{%.*}} = spv.UConvert {{%.*}} : i32 to i64 - %0 = spv.UConvert %arg0 : i32 to i64 - spv.ReturnValue %0 : i64 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ExecutionMode -//===----------------------------------------------------------------------===// - -spv.module Logical GLSL450 { - spv.func @do_nothing() -> () "None" { - spv.Return - } - spv.EntryPoint "GLCompute" @do_nothing - // CHECK: spv.ExecutionMode {{@.*}} "ContractionOff" - spv.ExecutionMode @do_nothing "ContractionOff" -} - -spv.module Logical GLSL450 { - spv.func @do_nothing() -> () "None" { - spv.Return - } - spv.EntryPoint "GLCompute" @do_nothing - // CHECK: spv.ExecutionMode {{@.*}} "LocalSizeHint", 3, 4, 5 - spv.ExecutionMode @do_nothing "LocalSizeHint", 3, 4, 5 -} - -// ----- - -spv.module Logical GLSL450 { - spv.func @do_nothing() -> () "None" { - spv.Return - } - spv.EntryPoint "GLCompute" @do_nothing - // expected-error @+1 {{custom op 'spv.ExecutionMode' invalid execution_mode attribute specification: "GLCompute"}} - spv.ExecutionMode @do_nothing "GLCompute", 3, 4, 5 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.LoadOp -//===----------------------------------------------------------------------===// - -// CHECK-LABEL: @simple_load -func @simple_load() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load "Function" %{{.*}} : f32 - %1 = spv.Load "Function" %0 : f32 - return -} - -// CHECK-LABEL: @load_none_access -func @load_none_access() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load "Function" %{{.*}} ["None"] : f32 - %1 = spv.Load "Function" %0 ["None"] : f32 - return -} - -// CHECK-LABEL: @volatile_load -func @volatile_load() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load "Function" %{{.*}} ["Volatile"] : f32 - %1 = spv.Load "Function" %0 ["Volatile"] : f32 - return -} - -// CHECK-LABEL: @aligned_load -func @aligned_load() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load "Function" %{{.*}} ["Aligned", 4] : f32 - %1 = spv.Load "Function" %0 ["Aligned", 4] : f32 - return -} - -// CHECK-LABEL: @volatile_aligned_load -func @volatile_aligned_load() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load "Function" %{{.*}} ["Volatile|Aligned", 4] : f32 - %1 = spv.Load "Function" %0 ["Volatile|Aligned", 4] : f32 - return -} - -// ----- - -// CHECK-LABEL: load_none_access -func @load_none_access() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load - // CHECK-SAME: ["None"] - %1 = "spv.Load"(%0) {memory_access = 0 : i32} : (!spv.ptr) -> (f32) - return -} - -// CHECK-LABEL: volatile_load -func @volatile_load() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load - // CHECK-SAME: ["Volatile"] - %1 = "spv.Load"(%0) {memory_access = 1 : i32} : (!spv.ptr) -> (f32) - return -} - -// CHECK-LABEL: aligned_load -func @aligned_load() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load - // CHECK-SAME: ["Aligned", 4] - %1 = "spv.Load"(%0) {memory_access = 2 : i32, alignment = 4 : i32} : (!spv.ptr) -> (f32) - return -} - -// CHECK-LABEL: volatile_aligned_load -func @volatile_aligned_load() -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Load - // CHECK-SAME: ["Volatile|Aligned", 4] - %1 = "spv.Load"(%0) {memory_access = 3 : i32, alignment = 4 : i32} : (!spv.ptr) -> (f32) - return -} - -// ----- - -func @simple_load_missing_storageclass() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected non-function type}} - %1 = spv.Load %0 : f32 - return -} - -// ----- - -func @simple_load_missing_operand() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected SSA operand}} - %1 = spv.Load "Function" : f32 - return -} - -// ----- - -func @simple_load_missing_rettype() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+2 {{expected ':'}} - %1 = spv.Load "Function" %0 - return -} - -// ----- - -func @volatile_load_missing_lbrace() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ':'}} - %1 = spv.Load "Function" %0 "Volatile"] : f32 - return -} - -// ----- - -func @volatile_load_missing_rbrace() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ']'}} - %1 = spv.Load "Function" %0 ["Volatile"} : f32 - return -} - -// ----- - -func @aligned_load_missing_alignment() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ','}} - %1 = spv.Load "Function" %0 ["Aligned"] : f32 - return -} - -// ----- - -func @aligned_load_missing_comma() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ','}} - %1 = spv.Load "Function" %0 ["Aligned" 4] : f32 - return -} - -// ----- - -func @load_incorrect_attributes() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ']'}} - %1 = spv.Load "Function" %0 ["Volatile", 4] : f32 - return -} - -// ----- - -func @load_unknown_memory_access() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{custom op 'spv.Load' invalid memory_access attribute specification: "Something"}} - %1 = spv.Load "Function" %0 ["Something"] : f32 - return -} - -// ----- - -func @load_unknown_memory_access() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{custom op 'spv.Load' invalid memory_access attribute specification: "Volatile|Something"}} - %1 = spv.Load "Function" %0 ["Volatile|Something"] : f32 - return -} - -// ----- - -func @load_unknown_memory_access() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{failed to satisfy constraint: valid SPIR-V MemoryAccess}} - %1 = "spv.Load"(%0) {memory_access = 0x80000000 : i32} : (!spv.ptr) -> (f32) - return -} - -// ----- - -func @aligned_load_incorrect_attributes() -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ']'}} - %1 = spv.Load "Function" %0 ["Aligned", 4, 23] : f32 - return -} - -// ----- - -spv.module Logical GLSL450 { - spv.globalVariable @var0 : !spv.ptr - // CHECK_LABEL: @simple_load - spv.func @simple_load() -> () "None" { - // CHECK: spv.Load "Input" {{%.*}} : f32 - %0 = spv.mlir.addressof @var0 : !spv.ptr - %1 = spv.Load "Input" %0 : f32 - spv.Return - } -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.LogicalAnd -//===----------------------------------------------------------------------===// - -func @logicalBinary(%arg0 : i1, %arg1 : i1, %arg2 : i1) -{ - // CHECK: [[TMP:%.*]] = spv.LogicalAnd {{%.*}}, {{%.*}} : i1 - %0 = spv.LogicalAnd %arg0, %arg1 : i1 - // CHECK: {{%.*}} = spv.LogicalAnd [[TMP]], {{%.*}} : i1 - %1 = spv.LogicalAnd %0, %arg2 : i1 - return -} - -func @logicalBinary2(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>) -{ - // CHECK: {{%.*}} = spv.LogicalAnd {{%.*}}, {{%.*}} : vector<4xi1> - %0 = spv.LogicalAnd %arg0, %arg1 : vector<4xi1> - return -} - -// ----- - -func @logicalBinary(%arg0 : i1, %arg1 : i1) -{ - // expected-error @+2 {{expected ':'}} - %0 = spv.LogicalAnd %arg0, %arg1 - return -} - -// ----- - -func @logicalBinary(%arg0 : i1, %arg1 : i1) -{ - // expected-error @+2 {{expected non-function type}} - %0 = spv.LogicalAnd %arg0, %arg1 : - return -} - -// ----- - -func @logicalBinary(%arg0 : i1, %arg1 : i1) -{ - // expected-error @+1 {{custom op 'spv.LogicalAnd' expected 2 operands}} - %0 = spv.LogicalAnd %arg0 : i1 - return -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.LogicalNot -//===----------------------------------------------------------------------===// - -func @logicalUnary(%arg0 : i1, %arg1 : i1) -{ - // CHECK: [[TMP:%.*]] = spv.LogicalNot {{%.*}} : i1 - %0 = spv.LogicalNot %arg0 : i1 - // CHECK: {{%.*}} = spv.LogicalNot [[TMP]] : i1 - %1 = spv.LogicalNot %0 : i1 - return -} - -func @logicalUnary2(%arg0 : vector<4xi1>) -{ - // CHECK: {{%.*}} = spv.LogicalNot {{%.*}} : vector<4xi1> - %0 = spv.LogicalNot %arg0 : vector<4xi1> - return -} - -// ----- - -func @logicalUnary(%arg0 : i1) -{ - // expected-error @+2 {{expected ':'}} - %0 = spv.LogicalNot %arg0 - return -} - -// ----- - -func @logicalUnary(%arg0 : i1) -{ - // expected-error @+2 {{expected non-function type}} - %0 = spv.LogicalNot %arg0 : - return -} - -// ----- - -func @logicalUnary(%arg0 : i1) -{ - // expected-error @+1 {{expected SSA operand}} - %0 = spv.LogicalNot : i1 - return -} - -// ----- - -func @logicalUnary(%arg0 : i32) -{ - // expected-error @+1 {{operand #0 must be bool or vector of bool values of length 2/3/4/8/16, but got 'i32'}} - %0 = spv.LogicalNot %arg0 : i32 - return -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.MemoryBarrier -//===----------------------------------------------------------------------===// - -func @memory_barrier_0() -> () { - // CHECK: spv.MemoryBarrier "Device", "Acquire|UniformMemory" - spv.MemoryBarrier "Device", "Acquire|UniformMemory" - return -} - -// ----- - -func @memory_barrier_1() -> () { - // CHECK: spv.MemoryBarrier "Workgroup", "Acquire" - spv.MemoryBarrier "Workgroup", "Acquire" - return -} - -// ----- - -func @memory_barrier_2() -> () { - // expected-error @+1 {{expected at most one of these four memory constraints to be set: `Acquire`, `Release`,`AcquireRelease` or `SequentiallyConsistent`}} - spv.MemoryBarrier "Device", "Acquire|Release" - return -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.Not -//===----------------------------------------------------------------------===// - -func @not(%arg: i32) -> i32 { - // CHECK: spv.Not {{%.*}} : i32 - %0 = spv.Not %arg : i32 - spv.ReturnValue %0 : i32 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.SelectOp -//===----------------------------------------------------------------------===// - -func @select_op_bool(%arg0: i1) -> () { - %0 = spv.constant true - %1 = spv.constant false - // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, i1 - %2 = spv.Select %arg0, %0, %1 : i1, i1 - return -} - -func @select_op_int(%arg0: i1) -> () { - %0 = spv.constant 2 : i32 - %1 = spv.constant 3 : i32 - // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, i32 - %2 = spv.Select %arg0, %0, %1 : i1, i32 - return -} - -func @select_op_float(%arg0: i1) -> () { - %0 = spv.constant 2.0 : f32 - %1 = spv.constant 3.0 : f32 - // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, f32 - %2 = spv.Select %arg0, %0, %1 : i1, f32 - return -} - -func @select_op_ptr(%arg0: i1) -> () { - %0 = spv.Variable : !spv.ptr - %1 = spv.Variable : !spv.ptr - // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, !spv.ptr - %2 = spv.Select %arg0, %0, %1 : i1, !spv.ptr - return -} - -func @select_op_vec(%arg0: i1) -> () { - %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> - %1 = spv.constant dense<[5.0, 6.0, 7.0]> : vector<3xf32> - // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : i1, vector<3xf32> - %2 = spv.Select %arg0, %0, %1 : i1, vector<3xf32> - return -} - -func @select_op_vec_condn_vec(%arg0: vector<3xi1>) -> () { - %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> - %1 = spv.constant dense<[5.0, 6.0, 7.0]> : vector<3xf32> - // CHECK : spv.Select {{%.*}}, {{%.*}}, {{%.*}} : vector<3xi1>, vector<3xf32> - %2 = spv.Select %arg0, %0, %1 : vector<3xi1>, vector<3xf32> - return -} - -// ----- - -func @select_op(%arg0: i1) -> () { - %0 = spv.constant 2 : i32 - %1 = spv.constant 3 : i32 - // expected-error @+2 {{expected ','}} - %2 = spv.Select %arg0, %0, %1 : i1 - return -} - -// ----- - -func @select_op(%arg1: vector<3xi1>) -> () { - %0 = spv.constant 2 : i32 - %1 = spv.constant 3 : i32 - // expected-error @+1 {{result expected to be of vector type when condition is of vector type}} - %2 = spv.Select %arg1, %0, %1 : vector<3xi1>, i32 - return -} - -// ----- - -func @select_op(%arg1: vector<4xi1>) -> () { - %0 = spv.constant dense<[2, 3, 4]> : vector<3xi32> - %1 = spv.constant dense<[5, 6, 7]> : vector<3xi32> - // expected-error @+1 {{result should have the same number of elements as the condition when condition is of vector type}} - %2 = spv.Select %arg1, %0, %1 : vector<4xi1>, vector<3xi32> - return -} - -// ----- - -func @select_op(%arg1: vector<4xi1>) -> () { - %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> - %1 = spv.constant dense<[5, 6, 7]> : vector<3xi32> - // expected-error @+1 {{all of {true_value, false_value, result} have same type}} - %2 = "spv.Select"(%arg1, %0, %1) : (vector<4xi1>, vector<3xf32>, vector<3xi32>) -> vector<3xi32> - return -} - -// ----- - -func @select_op(%arg1: vector<4xi1>) -> () { - %0 = spv.constant dense<[2.0, 3.0, 4.0]> : vector<3xf32> - %1 = spv.constant dense<[5, 6, 7]> : vector<3xi32> - // expected-error @+1 {{all of {true_value, false_value, result} have same type}} - %2 = "spv.Select"(%arg1, %1, %0) : (vector<4xi1>, vector<3xi32>, vector<3xf32>) -> vector<3xi32> - return -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ShiftLeftLogical -//===----------------------------------------------------------------------===// - -func @shift_left_logical(%arg0: i32, %arg1 : i16) -> i32 { - // CHECK: {{%.*}} = spv.ShiftLeftLogical {{%.*}}, {{%.*}} : i32, i16 - %0 = spv.ShiftLeftLogical %arg0, %arg1: i32, i16 - spv.ReturnValue %0 : i32 -} - -// ----- - -func @shift_left_logical_invalid_result_type(%arg0: i32, %arg1 : i16) -> i16 { - // expected-error @+1 {{expected the same type for the first operand and result, but provided 'i32' and 'i16'}} - %0 = "spv.ShiftLeftLogical" (%arg0, %arg1) : (i32, i16) -> (i16) - spv.ReturnValue %0 : i16 -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ShiftRightArithmetic -//===----------------------------------------------------------------------===// - -func @shift_right_arithmetic(%arg0: vector<4xi32>, %arg1 : vector<4xi8>) -> vector<4xi32> { - // CHECK: {{%.*}} = spv.ShiftRightArithmetic {{%.*}}, {{%.*}} : vector<4xi32>, vector<4xi8> - %0 = spv.ShiftRightArithmetic %arg0, %arg1: vector<4xi32>, vector<4xi8> - spv.ReturnValue %0 : vector<4xi32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.ShiftRightLogical -//===----------------------------------------------------------------------===// - -func @shift_right_logical(%arg0: vector<2xi32>, %arg1 : vector<2xi8>) -> vector<2xi32> { - // CHECK: {{%.*}} = spv.ShiftRightLogical {{%.*}}, {{%.*}} : vector<2xi32>, vector<2xi8> - %0 = spv.ShiftRightLogical %arg0, %arg1: vector<2xi32>, vector<2xi8> - spv.ReturnValue %0 : vector<2xi32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.StoreOp -//===----------------------------------------------------------------------===// - -func @simple_store(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Store "Function" %0, %arg0 : f32 - spv.Store "Function" %0, %arg0 : f32 - return -} - -// CHECK_LABEL: @volatile_store -func @volatile_store(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Store "Function" %0, %arg0 ["Volatile"] : f32 - spv.Store "Function" %0, %arg0 ["Volatile"] : f32 - return -} - -// CHECK_LABEL: @aligned_store -func @aligned_store(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // CHECK: spv.Store "Function" %0, %arg0 ["Aligned", 4] : f32 - spv.Store "Function" %0, %arg0 ["Aligned", 4] : f32 - return -} - -// ----- - -func @simple_store_missing_ptr_type(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected non-function type}} - spv.Store %0, %arg0 : f32 - return -} - -// ----- - -func @simple_store_missing_operand(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{custom op 'spv.Store' invalid operand}} : f32 - spv.Store "Function" , %arg0 : f32 - return -} - -// ----- - -func @simple_store_missing_operand(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{custom op 'spv.Store' expected 2 operands}} : f32 - spv.Store "Function" %0 : f32 - return -} - -// ----- - -func @volatile_store_missing_lbrace(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ':'}} - spv.Store "Function" %0, %arg0 "Volatile"] : f32 - return -} - -// ----- - -func @volatile_store_missing_rbrace(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ']'}} - spv.Store "Function" %0, %arg0 ["Volatile"} : f32 - return -} - -// ----- - -func @aligned_store_missing_alignment(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ','}} - spv.Store "Function" %0, %arg0 ["Aligned"] : f32 - return -} - -// ----- - -func @aligned_store_missing_comma(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ','}} - spv.Store "Function" %0, %arg0 ["Aligned" 4] : f32 - return -} - -// ----- - -func @load_incorrect_attributes(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ']'}} - spv.Store "Function" %0, %arg0 ["Volatile", 4] : f32 - return -} - -// ----- - -func @aligned_store_incorrect_attributes(%arg0 : f32) -> () { - %0 = spv.Variable : !spv.ptr - // expected-error @+1 {{expected ']'}} - spv.Store "Function" %0, %arg0 ["Aligned", 4, 23] : f32 - return -} - -// ----- - -spv.module Logical GLSL450 { - spv.globalVariable @var0 : !spv.ptr - spv.func @simple_store(%arg0 : f32) -> () "None" { - %0 = spv.mlir.addressof @var0 : !spv.ptr - // CHECK: spv.Store "Input" {{%.*}}, {{%.*}} : f32 - spv.Store "Input" %0, %arg0 : f32 - spv.Return - } -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.SubgroupBallotKHR -//===----------------------------------------------------------------------===// - -func @subgroup_ballot(%predicate: i1) -> vector<4xi32> { - %0 = spv.SubgroupBallotKHR %predicate: vector<4xi32> - return %0: vector<4xi32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// spv.undef -//===----------------------------------------------------------------------===// - -func @undef() -> () { - %0 = spv.undef : f32 - %1 = spv.undef : vector<4xf32> - spv.Return -} - -// ----- - -func @undef() -> () { - // expected-error @+2{{expected non-function type}} - %0 = spv.undef : - spv.Return -} - -// ----- - -func @undef() -> () { - // expected-error @+2{{expected ':'}} - %0 = spv.undef - spv.Return -} - -// ----- - - -//===----------------------------------------------------------------------===// -// spv.Variable -//===----------------------------------------------------------------------===// - -func @variable(%arg0: f32) -> () { - // CHECK: spv.Variable : !spv.ptr - %0 = spv.Variable : !spv.ptr - return -} - -// ----- - -func @variable_init_normal_constant() -> () { - %0 = spv.constant 4.0 : f32 - // CHECK: spv.Variable init(%0) : !spv.ptr - %1 = spv.Variable init(%0) : !spv.ptr - return -} - -// ----- - -spv.module Logical GLSL450 { - spv.globalVariable @global : !spv.ptr - spv.func @variable_init_global_variable() -> () "None" { - %0 = spv.mlir.addressof @global : !spv.ptr - // CHECK: spv.Variable init({{.*}}) : !spv.ptr, Function> - %1 = spv.Variable init(%0) : !spv.ptr, Function> - spv.Return - } -} - -// ----- - -spv.module Logical GLSL450 { - spv.specConstant @sc = 42 : i32 - // CHECK-LABEL: @variable_init_spec_constant - spv.func @variable_init_spec_constant() -> () "None" { - %0 = spv.mlir.referenceof @sc : i32 - // CHECK: spv.Variable init(%0) : !spv.ptr - %1 = spv.Variable init(%0) : !spv.ptr - spv.Return - } -} - -// ----- - -func @variable_bind() -> () { - // expected-error @+1 {{cannot have 'descriptor_set' attribute (only allowed in spv.globalVariable)}} - %0 = spv.Variable bind(1, 2) : !spv.ptr - return -} - -// ----- - -func @variable_init_bind() -> () { - %0 = spv.constant 4.0 : f32 - // expected-error @+1 {{cannot have 'binding' attribute (only allowed in spv.globalVariable)}} - %1 = spv.Variable init(%0) {binding = 5 : i32} : !spv.ptr - return -} - -// ----- - -func @variable_builtin() -> () { - // expected-error @+1 {{cannot have 'built_in' attribute (only allowed in spv.globalVariable)}} - %1 = spv.Variable built_in("GlobalInvocationID") : !spv.ptr, Function> - return -} - -// ----- - -func @expect_ptr_result_type(%arg0: f32) -> () { - // expected-error @+1 {{expected spv.ptr type}} - %0 = spv.Variable : f32 - return -} - -// ----- - -func @variable_init(%arg0: f32) -> () { - // expected-error @+1 {{op initializer must be the result of a constant or spv.globalVariable op}} - %0 = spv.Variable init(%arg0) : !spv.ptr - return -} - -// ----- - -func @cannot_be_generic_storage_class(%arg0: f32) -> () { - // expected-error @+1 {{op can only be used to model function-level variables. Use spv.globalVariable for module-level variables}} - %0 = spv.Variable : !spv.ptr - return -} - -// ----- - -func @copy_memory_incompatible_ptrs() { - %0 = spv.Variable : !spv.ptr - %1 = spv.Variable : !spv.ptr - // expected-error @+1 {{both operands must be pointers to the same type}} - "spv.CopyMemory"(%0, %1) {} : (!spv.ptr, !spv.ptr) -> () - spv.Return -} - -// ----- - -func @copy_memory_invalid_maa() { - %0 = spv.Variable : !spv.ptr - %1 = spv.Variable : !spv.ptr - // expected-error @+1 {{missing alignment value}} - "spv.CopyMemory"(%0, %1) {memory_access=0x0002 : i32} : (!spv.ptr, !spv.ptr) -> () - spv.Return -} - -// ----- - -func @copy_memory_invalid_source_maa() { - %0 = spv.Variable : !spv.ptr - %1 = spv.Variable : !spv.ptr - // expected-error @+1 {{invalid alignment specification with non-aligned memory access specification}} - "spv.CopyMemory"(%0, %1) {source_memory_access=0x0001 : i32, memory_access=0x0002 : i32, source_alignment=8 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () - spv.Return -} - -// ----- - -func @copy_memory_invalid_source_maa2() { - %0 = spv.Variable : !spv.ptr - %1 = spv.Variable : !spv.ptr - // expected-error @+1 {{missing alignment value}} - "spv.CopyMemory"(%0, %1) {source_memory_access=0x0002 : i32, memory_access=0x0002 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () - spv.Return -} - -// ----- - -func @copy_memory_print_maa() { - %0 = spv.Variable : !spv.ptr - %1 = spv.Variable : !spv.ptr - - // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Volatile"] : f32 - "spv.CopyMemory"(%0, %1) {memory_access=0x0001 : i32} : (!spv.ptr, !spv.ptr) -> () - - // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Aligned", 4] : f32 - "spv.CopyMemory"(%0, %1) {memory_access=0x0002 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () - - // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Aligned", 4], ["Volatile"] : f32 - "spv.CopyMemory"(%0, %1) {source_memory_access=0x0001 : i32, memory_access=0x0002 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () - - // CHECK: spv.CopyMemory "Function" %{{.*}}, "Function" %{{.*}} ["Aligned", 4], ["Aligned", 8] : f32 - "spv.CopyMemory"(%0, %1) {source_memory_access=0x0002 : i32, memory_access=0x0002 : i32, source_alignment=8 : i32, alignment=4 : i32} : (!spv.ptr, !spv.ptr) -> () - - spv.Return -} diff --git a/mlir/test/Dialect/SPIRV/IR/structure-ops.mlir b/mlir/test/Dialect/SPIRV/IR/structure-ops.mlir index c3f715f06ae241..132c4b73c937ca 100644 --- a/mlir/test/Dialect/SPIRV/IR/structure-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/structure-ops.mlir @@ -213,6 +213,41 @@ spv.module Logical GLSL450 { // ----- +//===----------------------------------------------------------------------===// +// spv.ExecutionMode +//===----------------------------------------------------------------------===// + +spv.module Logical GLSL450 { + spv.func @do_nothing() -> () "None" { + spv.Return + } + spv.EntryPoint "GLCompute" @do_nothing + // CHECK: spv.ExecutionMode {{@.*}} "ContractionOff" + spv.ExecutionMode @do_nothing "ContractionOff" +} + +spv.module Logical GLSL450 { + spv.func @do_nothing() -> () "None" { + spv.Return + } + spv.EntryPoint "GLCompute" @do_nothing + // CHECK: spv.ExecutionMode {{@.*}} "LocalSizeHint", 3, 4, 5 + spv.ExecutionMode @do_nothing "LocalSizeHint", 3, 4, 5 +} + +// ----- + +spv.module Logical GLSL450 { + spv.func @do_nothing() -> () "None" { + spv.Return + } + spv.EntryPoint "GLCompute" @do_nothing + // expected-error @+1 {{custom op 'spv.ExecutionMode' invalid execution_mode attribute specification: "GLCompute"}} + spv.ExecutionMode @do_nothing "GLCompute", 3, 4, 5 +} + +// ----- + //===----------------------------------------------------------------------===// // spv.func //===----------------------------------------------------------------------===// diff --git a/mlir/test/Target/SPIRV/barrier.mlir b/mlir/test/Target/SPIRV/barrier-ops.mlir similarity index 100% rename from mlir/test/Target/SPIRV/barrier.mlir rename to mlir/test/Target/SPIRV/barrier-ops.mlir From dd07d60ec33969ae4f1e37500bb852563e142dfb Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Tue, 12 Jan 2021 16:28:16 +0100 Subject: [PATCH 29/86] [SLP] Add test case showing a bug when dealing with padded types We shouldn't vectorize stores of non-packed types (i.e. types that has padding between consecutive variables in a scalar layout, but being packed in a vector layout). The problem was detected as a miscompile in a downstream test case. This is a pre-commit of a test case for the fix in D94446. --- .../Transforms/SLPVectorizer/X86/bad_types.ll | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll index b9fd032b5662f1..93a97c3c240584 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll @@ -15,8 +15,8 @@ define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) { ; CHECK-NEXT: [[A_AND:%.*]] = and i64 [[A_CAST]], 42 ; CHECK-NEXT: [[B_AND:%.*]] = and i64 [[B_CAST]], 42 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i32 1 -; CHECK-NEXT: store i64 [[A_AND]], i64* [[PTR]] -; CHECK-NEXT: store i64 [[B_AND]], i64* [[GEP]] +; CHECK-NEXT: store i64 [[A_AND]], i64* [[PTR]], align 8 +; CHECK-NEXT: store i64 [[B_AND]], i64* [[GEP]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -110,3 +110,33 @@ bb1: ; preds = %entry } declare void @f(i64, i64) + +define void @test4(i32 %a, i28* %ptr) { +; Check that we do not vectorize types that are padded to a bigger ones. +; FIXME: This is not correct! See D94446. +; +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i28 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i28, i28* [[PTR:%.*]], i32 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i28, i28* [[PTR]], i32 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i28, i28* [[PTR]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i28> poison, i28 [[TRUNC]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i28> [[TMP0]], i28 [[TRUNC]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i28> [[TMP1]], i28 [[TRUNC]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i28> [[TMP2]], i28 [[TRUNC]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i28* [[PTR]] to <4 x i28>* +; CHECK-NEXT: store <4 x i28> [[TMP3]], <4 x i28>* [[TMP4]], align 4 +; CHECK-NEXT: ret void +; +entry: + %trunc = trunc i32 %a to i28 + %gep1 = getelementptr i28, i28* %ptr, i32 1 + %gep2 = getelementptr i28, i28* %ptr, i32 2 + %gep3 = getelementptr i28, i28* %ptr, i32 3 + store i28 %trunc, i28* %ptr + store i28 %trunc, i28* %gep1 + store i28 %trunc, i28* %gep2 + store i28 %trunc, i28* %gep3 + ret void +} From 3f7b4ce96065eea66bf4344973173e76ec1a4255 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 12 Jan 2021 09:46:11 -0600 Subject: [PATCH 30/86] [PowerPC] Add support for embedded devices with EFPU2 PowerPC cores like e200z759n3 [1] using an efpu2 only support single precision hardware floating point instructions. The single precision instructions efs* and evfs* are identical to the spe float instructions while efd* and evfd* instructions trigger a not implemented exception. This patch introduces a new command line option -mefpu2 which leads to single-hardware / double-software code generation. [1] Core reference: https://www.nxp.com/files-static/32bit/doc/ref_manual/e200z759CRM.pdf Differential revision: https://reviews.llvm.org/D92935 --- clang/docs/ClangCommandLineReference.rst | 2 + clang/include/clang/Driver/Options.td | 1 + clang/lib/Basic/Targets/PPC.cpp | 6 +- clang/test/Driver/ppc-features.cpp | 3 + llvm/lib/Target/PowerPC/PPC.td | 3 + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 + llvm/test/CodeGen/PowerPC/spe.ll | 2032 ++++++++++++------- 9 files changed, 1338 insertions(+), 716 deletions(-) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index b46008970f578b..ac97f6fed935ac 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -3145,6 +3145,8 @@ PowerPC .. option:: -mdirect-move, -mno-direct-move +.. option:: -mefpu2 + .. option:: -mfloat128, -mno-float128 .. option:: -mfprnd, -mno-fprnd diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 35643701f97e4e..d9586e086a9cf1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3040,6 +3040,7 @@ def mpcrel: Flag<["-"], "mpcrel">, Group; def mno_pcrel: Flag<["-"], "mno-pcrel">, Group; def mspe : Flag<["-"], "mspe">, Group; def mno_spe : Flag<["-"], "mno-spe">, Group; +def mefpu2 : Flag<["-"], "mefpu2">, Group; def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">, Group, Flags<[CC1Option]>, HelpText<"Enable the extended Altivec ABI on AIX (AIX only). Uses volatile and nonvolatile vector registers">; def mabi_EQ_vec_default : Flag<["-"], "mabi=vec-default">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 2be7555102f8e7..cfede6e6e756a5 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -56,7 +56,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector &Features, HasP10Vector = true; } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; - } else if (Feature == "+spe") { + } else if (Feature == "+spe" || Feature == "+efpu2") { HasSPE = true; LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); @@ -402,6 +402,8 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const { void PPCTargetInfo::setFeatureEnabled(llvm::StringMap &Features, StringRef Name, bool Enabled) const { if (Enabled) { + if (Name == "efpu2") + Features["spe"] = true; // If we're enabling any of the vsx based features then enable vsx and // altivec. We'll diagnose any problems later. bool FeatureHasVSX = llvm::StringSwitch(Name) @@ -425,6 +427,8 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap &Features, else Features[Name] = true; } else { + if (Name == "spe") + Features["efpu2"] = false; // If we're disabling altivec or vsx go ahead and disable all of the vsx // features. if ((Name == "altivec") || (Name == "vsx")) diff --git a/clang/test/Driver/ppc-features.cpp b/clang/test/Driver/ppc-features.cpp index 85060951aa1680..def96c351b34da 100644 --- a/clang/test/Driver/ppc-features.cpp +++ b/clang/test/Driver/ppc-features.cpp @@ -155,6 +155,9 @@ // CHECK-SPE: "-target-feature" "+spe" // CHECK-NOSPE: "-target-feature" "-spe" +// RUN: %clang -target powerpc %s -mefpu2 -c -### 2>&1 | FileCheck -check-prefix=CHECK-EFPU2 %s +// CHECK-EFPU2: "-target-feature" "+efpu2" + // Assembler features // RUN: %clang -target powerpc-unknown-linux-gnu %s -### -o %t.o -no-integrated-as 2>&1 | FileCheck -check-prefix=CHECK_32_BE_AS_ARGS %s // CHECK_32_BE_AS_ARGS: "-mppc" diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 2975ae161aaad0..06403f5e55a20b 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -72,6 +72,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true", "Enable SPE instructions", [FeatureHardFloat]>; +def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true", + "Enable Embedded Floating-Point APU 2 instructions", + [FeatureSPE]>; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 91a81e36f7da2c..739f2a9684d68b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -151,7 +151,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (!useSoftFloat()) { if (hasSPE()) { addRegisterClass(MVT::f32, &PPC::GPRCRegClass); - addRegisterClass(MVT::f64, &PPC::SPERCRegClass); + // EFPU2 APU only supports f32 + if (!Subtarget.hasEFPU2()) + addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 4bbcef13b4f126..86816fae5a55fa 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -77,6 +77,7 @@ void PPCSubtarget::initializeEnvironment() { HasHardFloat = false; HasAltivec = false; HasSPE = false; + HasEFPU2 = false; HasFPU = false; HasVSX = false; NeedsTwoConstNR = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 8f0034131bc51f..5003e12b72bc5e 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -100,6 +100,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasAltivec; bool HasFPU; bool HasSPE; + bool HasEFPU2; bool HasVSX; bool NeedsTwoConstNR; bool HasP8Vector; @@ -257,6 +258,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasSPE() const { return HasSPE; } + bool hasEFPU2() const { return HasEFPU2; } bool hasFPU() const { return HasFPU; } bool hasVSX() const { return HasVSX; } bool needsTwoConstNR() const { return NeedsTwoConstNR; } diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index 94a8539aeddead..59bc6abc4f759c 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1,6 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ -; RUN: -mattr=+spe | FileCheck %s +; RUN: split-file %s %t +; RUN: llc -verify-machineinstrs < %t/single.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %t/single.ll +; RUN: llc -verify-machineinstrs < %t/double.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %t/double.ll -check-prefix=SPE +; RUN: llc -verify-machineinstrs < %t/hwdouble.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %t/hwdouble.ll -check-prefix=SPE +; RUN: llc -verify-machineinstrs < %t/single.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+efpu2 | FileCheck %t/single.ll +; RUN: llc -verify-machineinstrs < %t/double.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+efpu2 | FileCheck %t/double.ll -check-prefix=EFPU2 + +;--- single.ll +; single tests (identical for -mattr=+spe and -mattr=+efpu2) declare float @llvm.fabs.float(float) define float @test_float_abs(float %a) #0 { @@ -24,7 +36,7 @@ define float @test_fnabs(float %a) #0 { ret float %sub } -define float @test_fdiv(float %a, float %b) { +define float @test_fdiv(float %a, float %b) #0 { ; CHECK-LABEL: test_fdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efsdiv 3, 3, 4 @@ -35,7 +47,7 @@ entry: } -define float @test_fmul(float %a, float %b) { +define float @test_fmul(float %a, float %b) #0 { ; CHECK-LABEL: test_fmul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efsmul 3, 3, 4 @@ -45,7 +57,7 @@ define float @test_fmul(float %a, float %b) { ret float %v } -define float @test_fadd(float %a, float %b) { +define float @test_fadd(float %a, float %b) #0 { ; CHECK-LABEL: test_fadd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efsadd 3, 3, 4 @@ -55,7 +67,7 @@ define float @test_fadd(float %a, float %b) { ret float %v } -define float @test_fsub(float %a, float %b) { +define float @test_fsub(float %a, float %b) #0 { ; CHECK-LABEL: test_fsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efssub 3, 3, 4 @@ -65,7 +77,7 @@ define float @test_fsub(float %a, float %b) { ret float %v } -define float @test_fneg(float %a) { +define float @test_fneg(float %a) #0 { ; CHECK-LABEL: test_fneg: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efsneg 3, 3 @@ -75,30 +87,18 @@ define float @test_fneg(float %a) { ret float %v } -define float @test_dtos(double %a) { -; CHECK-LABEL: test_dtos: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efscfd 3, 3 -; CHECK-NEXT: blr - entry: - %v = fptrunc double %a to float - ret float %v -} - -define i32 @test_fcmpgt(float %a, float %b) { +define i32 @test_fcmpgt(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpgt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: ble 0, .LBB8_2 +; CHECK-NEXT: ble 0, .LBB7_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB8_3 -; CHECK-NEXT: .LBB8_2: # %fa +; CHECK-NEXT: b .LBB7_3 +; CHECK-NEXT: .LBB7_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB8_3: # %ret +; CHECK-NEXT: .LBB7_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -118,25 +118,24 @@ ret: ret i32 %0 } -define i32 @test_fcmpugt(float %a, float %b) { +define i32 @test_fcmpugt(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpugt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB9_4 +; CHECK-NEXT: bc 4, 1, .LBB8_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB9_4 +; CHECK-NEXT: bc 4, 1, .LBB8_4 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB9_4 +; CHECK-NEXT: bc 12, 1, .LBB8_4 ; CHECK-NEXT: # %bb.3: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB9_5 -; CHECK-NEXT: .LBB9_4: # %tr +; CHECK-NEXT: b .LBB8_5 +; CHECK-NEXT: .LBB8_4: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB9_5: # %ret +; CHECK-NEXT: .LBB8_5: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -156,25 +155,24 @@ ret: ret i32 %0 } -define i32 @test_fcmple(float %a, float %b) { +define i32 @test_fcmple(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmple: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB10_4 +; CHECK-NEXT: bc 4, 1, .LBB9_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB10_4 +; CHECK-NEXT: bc 4, 1, .LBB9_4 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB10_4 +; CHECK-NEXT: bc 12, 1, .LBB9_4 ; CHECK-NEXT: # %bb.3: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB10_5 -; CHECK-NEXT: .LBB10_4: # %fa +; CHECK-NEXT: b .LBB9_5 +; CHECK-NEXT: .LBB9_4: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB10_5: # %ret +; CHECK-NEXT: .LBB9_5: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -194,19 +192,18 @@ ret: ret i32 %0 } -define i32 @test_fcmpule(float %a, float %b) { +define i32 @test_fcmpule(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpule: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: bgt 0, .LBB11_2 +; CHECK-NEXT: bgt 0, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB11_3 -; CHECK-NEXT: .LBB11_2: # %fa +; CHECK-NEXT: b .LBB10_3 +; CHECK-NEXT: .LBB10_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB11_3: # %ret +; CHECK-NEXT: .LBB10_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -227,19 +224,18 @@ ret: } ; The type of comparison found in C's if (x == y) -define i32 @test_fcmpeq(float %a, float %b) { +define i32 @test_fcmpeq(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpeq: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 4 -; CHECK-NEXT: ble 0, .LBB12_2 +; CHECK-NEXT: ble 0, .LBB11_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB12_3 -; CHECK-NEXT: .LBB12_2: # %fa +; CHECK-NEXT: b .LBB11_3 +; CHECK-NEXT: .LBB11_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB12_3: # %ret +; CHECK-NEXT: .LBB11_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -260,18 +256,18 @@ ret: } ; (un)ordered tests are expanded to une and oeq so verify -define i1 @test_fcmpuno(float %a, float %b) { +define i1 @test_fcmpuno(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpuno: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efscmpeq 0, 3, 3 ; CHECK-NEXT: efscmpeq 1, 4, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB13_2 +; CHECK-NEXT: bc 12, 20, .LBB12_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB13_2: # %entry +; CHECK-NEXT: .LBB12_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -279,18 +275,18 @@ define i1 @test_fcmpuno(float %a, float %b) { ret i1 %r } -define i1 @test_fcmpord(float %a, float %b) { +define i1 @test_fcmpord(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpord: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efscmpeq 0, 4, 4 ; CHECK-NEXT: efscmpeq 1, 3, 3 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crnand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB14_2 +; CHECK-NEXT: bc 12, 20, .LBB13_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB14_2: # %entry +; CHECK-NEXT: .LBB13_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -298,7 +294,7 @@ define i1 @test_fcmpord(float %a, float %b) { ret i1 %r } -define i1 @test_fcmpueq(float %a, float %b) { +define i1 @test_fcmpueq(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpueq: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efscmpeq 0, 3, 3 @@ -307,11 +303,11 @@ define i1 @test_fcmpueq(float %a, float %b) { ; CHECK-NEXT: efscmpeq 0, 3, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crnor 20, 1, 20 -; CHECK-NEXT: bc 12, 20, .LBB15_2 +; CHECK-NEXT: bc 12, 20, .LBB14_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB15_2: # %entry +; CHECK-NEXT: .LBB14_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -319,7 +315,7 @@ define i1 @test_fcmpueq(float %a, float %b) { ret i1 %r } -define i1 @test_fcmpne(float %a, float %b) { +define i1 @test_fcmpne(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpne: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efscmpeq 0, 4, 4 @@ -328,11 +324,11 @@ define i1 @test_fcmpne(float %a, float %b) { ; CHECK-NEXT: efscmpeq 0, 3, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crorc 20, 1, 20 -; CHECK-NEXT: bc 12, 20, .LBB16_2 +; CHECK-NEXT: bc 12, 20, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB16_2: # %entry +; CHECK-NEXT: .LBB15_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -340,19 +336,18 @@ define i1 @test_fcmpne(float %a, float %b) { ret i1 %r } -define i32 @test_fcmpune(float %a, float %b) { +define i32 @test_fcmpune(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpune: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 4 -; CHECK-NEXT: bgt 0, .LBB17_2 +; CHECK-NEXT: bgt 0, .LBB16_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB17_3 -; CHECK-NEXT: .LBB17_2: # %fa +; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: .LBB16_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB17_3: # %ret +; CHECK-NEXT: .LBB16_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -372,19 +367,18 @@ ret: ret i32 %0 } -define i32 @test_fcmplt(float %a, float %b) { +define i32 @test_fcmplt(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmplt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmplt 0, 3, 4 -; CHECK-NEXT: ble 0, .LBB18_2 +; CHECK-NEXT: ble 0, .LBB17_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB18_3 -; CHECK-NEXT: .LBB18_2: # %fa +; CHECK-NEXT: b .LBB17_3 +; CHECK-NEXT: .LBB17_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB18_3: # %ret +; CHECK-NEXT: .LBB17_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -404,7 +398,7 @@ ret: ret i32 %0 } -define i1 @test_fcmpult(float %a, float %b) { +define i1 @test_fcmpult(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpult: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: efscmpeq 0, 3, 3 @@ -413,11 +407,11 @@ define i1 @test_fcmpult(float %a, float %b) { ; CHECK-NEXT: efscmplt 0, 3, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crnor 20, 1, 20 -; CHECK-NEXT: bc 12, 20, .LBB19_2 +; CHECK-NEXT: bc 12, 20, .LBB18_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB19_2: # %entry +; CHECK-NEXT: .LBB18_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -425,25 +419,24 @@ define i1 @test_fcmpult(float %a, float %b) { ret i1 %r } -define i32 @test_fcmpge(float %a, float %b) { +define i32 @test_fcmpge(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpge: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB20_4 +; CHECK-NEXT: bc 4, 1, .LBB19_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB20_4 +; CHECK-NEXT: bc 4, 1, .LBB19_4 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: efscmplt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB20_4 +; CHECK-NEXT: bc 12, 1, .LBB19_4 ; CHECK-NEXT: # %bb.3: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB20_5 -; CHECK-NEXT: .LBB20_4: # %fa +; CHECK-NEXT: b .LBB19_5 +; CHECK-NEXT: .LBB19_4: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB20_5: # %ret +; CHECK-NEXT: .LBB19_5: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -463,19 +456,18 @@ ret: ret i32 %0 } -define i32 @test_fcmpuge(float %a, float %b) { +define i32 @test_fcmpuge(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpuge: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmplt 0, 3, 4 -; CHECK-NEXT: bgt 0, .LBB21_2 +; CHECK-NEXT: bgt 0, .LBB20_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB21_3 -; CHECK-NEXT: .LBB21_2: # %fa +; CHECK-NEXT: b .LBB20_3 +; CHECK-NEXT: .LBB20_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB21_3: # %ret +; CHECK-NEXT: .LBB20_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -496,7 +488,7 @@ ret: } -define i32 @test_ftoui(float %a) { +define i32 @test_ftoui(float %a) #0 { ; CHECK-LABEL: test_ftoui: ; CHECK: # %bb.0: ; CHECK-NEXT: efsctuiz 3, 3 @@ -505,7 +497,7 @@ define i32 @test_ftoui(float %a) { ret i32 %v } -define i32 @test_ftosi(float %a) { +define i32 @test_ftosi(float %a) #0 { ; CHECK-LABEL: test_ftosi: ; CHECK: # %bb.0: ; CHECK-NEXT: efsctsiz 3, 3 @@ -514,7 +506,7 @@ define i32 @test_ftosi(float %a) { ret i32 %v } -define float @test_ffromui(i32 %a) { +define float @test_ffromui(i32 %a) #0 { ; CHECK-LABEL: test_ffromui: ; CHECK: # %bb.0: ; CHECK-NEXT: efscfui 3, 3 @@ -523,7 +515,7 @@ define float @test_ffromui(i32 %a) { ret float %v } -define float @test_ffromsi(i32 %a) { +define float @test_ffromsi(i32 %a) #0 { ; CHECK-LABEL: test_ffromsi: ; CHECK: # %bb.0: ; CHECK-NEXT: efscfsi 3, 3 @@ -532,11 +524,10 @@ define float @test_ffromsi(i32 %a) { ret float %v } -define i32 @test_fasmconst(float %x) { +define i32 @test_fasmconst(float %x) #0 { ; CHECK-LABEL: test_fasmconst: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -32(1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: stw 3, 20(1) ; CHECK-NEXT: stw 3, 24(1) ; CHECK-NEXT: lwz 3, 20(1) @@ -553,16 +544,48 @@ entry: ret i32 %1 ; Check that it's not loading a double } +attributes #0 = { nounwind } +;--- double.ll ; Double tests +; results depend on -mattr=+spe or -mattr=+efpu2 + +define float @test_dtos(double %a) #0 { +; SPE-LABEL: test_dtos: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efscfd 3, 3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dtos: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __truncdfsf2 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr + entry: + %v = fptrunc double %a to float + ret float %v +} define void @test_double_abs(double * %aa) #0 { -; CHECK-LABEL: test_double_abs: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evldd 4, 0(3) -; CHECK-NEXT: efdabs 4, 4 -; CHECK-NEXT: evstdd 4, 0(3) -; CHECK-NEXT: blr +; SPE-LABEL: test_double_abs: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evldd 4, 0(3) +; SPE-NEXT: efdabs 4, 4 +; SPE-NEXT: evstdd 4, 0(3) +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_double_abs: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: lwz 4, 0(3) +; EFPU2-NEXT: clrlwi 4, 4, 1 +; EFPU2-NEXT: stw 4, 0(3) +; EFPU2-NEXT: blr entry: %0 = load double, double * %aa %1 = tail call double @llvm.fabs.f64(double %0) #2 @@ -574,12 +597,19 @@ define void @test_double_abs(double * %aa) #0 { declare double @llvm.fabs.f64(double) #1 define void @test_dnabs(double * %aa) #0 { -; CHECK-LABEL: test_dnabs: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evldd 4, 0(3) -; CHECK-NEXT: efdnabs 4, 4 -; CHECK-NEXT: evstdd 4, 0(3) -; CHECK-NEXT: blr +; SPE-LABEL: test_dnabs: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evldd 4, 0(3) +; SPE-NEXT: efdnabs 4, 4 +; SPE-NEXT: evstdd 4, 0(3) +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dnabs: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: lwz 4, 0(3) +; EFPU2-NEXT: oris 4, 4, 32768 +; EFPU2-NEXT: stw 4, 0(3) +; EFPU2-NEXT: blr entry: %0 = load double, double * %aa %1 = tail call double @llvm.fabs.f64(double %0) #2 @@ -588,156 +618,263 @@ define void @test_dnabs(double * %aa) #0 { ret void } -define double @test_ddiv(double %a, double %b) { -; CHECK-LABEL: test_ddiv: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efddiv 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_ddiv(double %a, double %b) #0 { +; SPE-LABEL: test_ddiv: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efddiv 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_ddiv: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __divdf3 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = fdiv double %a, %b ret double %v } -define double @test_dmul(double %a, double %b) { -; CHECK-LABEL: test_dmul: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdmul 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_dmul(double %a, double %b) #0 { +; SPE-LABEL: test_dmul: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdmul 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dmul: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __muldf3 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = fmul double %a, %b ret double %v } -define double @test_dadd(double %a, double %b) { -; CHECK-LABEL: test_dadd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdadd 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_dadd(double %a, double %b) #0 { +; SPE-LABEL: test_dadd: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdadd 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dadd: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __adddf3 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = fadd double %a, %b ret double %v } -define double @test_dsub(double %a, double %b) { -; CHECK-LABEL: test_dsub: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdsub 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_dsub(double %a, double %b) #0 { +; SPE-LABEL: test_dsub: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdsub 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dsub: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __subdf3 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = fsub double %a, %b ret double %v } -define double @test_dneg(double %a) { -; CHECK-LABEL: test_dneg: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdneg 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_dneg(double %a) #0 { +; SPE-LABEL: test_dneg: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdneg 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dneg: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: xoris 3, 3, 32768 +; EFPU2-NEXT: blr entry: %v = fsub double -0.0, %a ret double %v } -define double @test_stod(float %a) { -; CHECK-LABEL: test_stod: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efdcfs 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_stod(float %a) #0 { +; SPE-LABEL: test_stod: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfs 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_stod: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __extendsfdf2 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = fpext float %a to double ret double %v } ; (un)ordered tests are expanded to une and oeq so verify -define i1 @test_dcmpuno(double %a, double %b) { -; CHECK-LABEL: test_dcmpuno: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: efdcmpeq 1, 5, 5 -; CHECK-NEXT: crand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB35_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB35_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +define i1 @test_dcmpuno(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpuno: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: efdcmpeq 1, 5, 5 +; SPE-NEXT: crand 20, 5, 1 +; SPE-NEXT: bc 12, 20, .LBB9_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB9_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpuno: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __unorddf2 +; EFPU2-NEXT: cntlzw 3, 3 +; EFPU2-NEXT: not 3, 3 +; EFPU2-NEXT: rlwinm 3, 3, 27, 31, 31 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = fcmp uno double %a, %b ret i1 %r } -define i1 @test_dcmpord(double %a, double %b) { -; CHECK-LABEL: test_dcmpord: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: efdcmpeq 1, 3, 3 -; CHECK-NEXT: crnand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB36_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB36_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +define i1 @test_dcmpord(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpord: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: efdcmpeq 1, 3, 3 +; SPE-NEXT: crnand 20, 5, 1 +; SPE-NEXT: bc 12, 20, .LBB10_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB10_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpord: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __unorddf2 +; EFPU2-NEXT: cntlzw 3, 3 +; EFPU2-NEXT: rlwinm 3, 3, 27, 31, 31 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = fcmp ord double %a, %b ret i1 %r } -define i32 @test_dcmpgt(double %a, double %b) { -; CHECK-LABEL: test_dcmpgt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpgt 0, 3, 5 -; CHECK-NEXT: ble 0, .LBB37_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB37_3 -; CHECK-NEXT: .LBB37_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB37_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpgt(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpgt: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: ble 0, .LBB11_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB11_3 +; SPE-NEXT: .LBB11_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB11_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpgt: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __gtdf2 +; EFPU2-NEXT: cmpwi 3, 1 +; EFPU2-NEXT: blt 0, .LBB11_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB11_3 +; EFPU2-NEXT: .LBB11_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB11_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp ogt double %a, %b @@ -753,31 +890,51 @@ ret: ret i32 %0 } -define i32 @test_dcmpugt(double %a, double %b) { -; CHECK-LABEL: test_dcmpugt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB38_4 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB38_4 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: efdcmpgt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB38_4 -; CHECK-NEXT: # %bb.3: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB38_5 -; CHECK-NEXT: .LBB38_4: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB38_5: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpugt(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpugt: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: bc 4, 1, .LBB12_4 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: bc 4, 1, .LBB12_4 +; SPE-NEXT: # %bb.2: # %entry +; SPE-NEXT: efdcmpgt 0, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB12_4 +; SPE-NEXT: # %bb.3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: b .LBB12_5 +; SPE-NEXT: .LBB12_4: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: .LBB12_5: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpugt: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __ledf2 +; EFPU2-NEXT: cmpwi 3, 1 +; EFPU2-NEXT: blt 0, .LBB12_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB12_3 +; EFPU2-NEXT: .LBB12_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB12_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp ugt double %a, %b @@ -793,25 +950,45 @@ ret: ret i32 %0 } -define i32 @test_dcmple(double %a, double %b) { -; CHECK-LABEL: test_dcmple: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpgt 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB39_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB39_3 -; CHECK-NEXT: .LBB39_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB39_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmple(double %a, double %b) #0 { +; SPE-LABEL: test_dcmple: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB13_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB13_3 +; SPE-NEXT: .LBB13_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB13_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmple: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __gtdf2 +; EFPU2-NEXT: cmpwi 3, 0 +; EFPU2-NEXT: bgt 0, .LBB13_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB13_3 +; EFPU2-NEXT: .LBB13_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB13_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp ule double %a, %b @@ -827,25 +1004,45 @@ ret: ret i32 %0 } -define i32 @test_dcmpule(double %a, double %b) { -; CHECK-LABEL: test_dcmpule: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpgt 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB40_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB40_3 -; CHECK-NEXT: .LBB40_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB40_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpule(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpule: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB14_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB14_3 +; SPE-NEXT: .LBB14_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB14_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpule: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __gtdf2 +; EFPU2-NEXT: cmpwi 3, 0 +; EFPU2-NEXT: bgt 0, .LBB14_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB14_3 +; EFPU2-NEXT: .LBB14_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB14_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp ule double %a, %b @@ -862,25 +1059,45 @@ ret: } ; The type of comparison found in C's if (x == y) -define i32 @test_dcmpeq(double %a, double %b) { -; CHECK-LABEL: test_dcmpeq: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpeq 0, 3, 5 -; CHECK-NEXT: ble 0, .LBB41_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB41_3 -; CHECK-NEXT: .LBB41_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB41_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpeq(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpeq: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpeq 0, 3, 5 +; SPE-NEXT: ble 0, .LBB15_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB15_3 +; SPE-NEXT: .LBB15_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB15_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpeq: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __nedf2 +; EFPU2-NEXT: cmplwi 3, 0 +; EFPU2-NEXT: bne 0, .LBB15_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB15_3 +; EFPU2-NEXT: .LBB15_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB15_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp oeq double %a, %b @@ -896,31 +1113,83 @@ ret: ret i32 %0 } -define i32 @test_dcmpueq(double %a, double %b) { -; CHECK-LABEL: test_dcmpueq: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB42_4 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB42_4 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB42_4 -; CHECK-NEXT: # %bb.3: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB42_5 -; CHECK-NEXT: .LBB42_4: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB42_5: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpueq(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpueq: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: bc 4, 1, .LBB16_4 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: bc 4, 1, .LBB16_4 +; SPE-NEXT: # %bb.2: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB16_4 +; SPE-NEXT: # %bb.3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: b .LBB16_5 +; SPE-NEXT: .LBB16_4: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: .LBB16_5: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpueq: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -96(1) +; EFPU2-NEXT: mfcr 12 +; EFPU2-NEXT: stw 27, 76(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 80(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 84(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 88(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 12, 72(1) +; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 27, 3 +; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 28, 4 +; EFPU2-NEXT: evstdd 29, 40(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 29, 5 +; EFPU2-NEXT: evstdd 30, 48(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 30, 6 +; EFPU2-NEXT: bl __eqdf2 +; EFPU2-NEXT: cmpwi 2, 3, 0 +; EFPU2-NEXT: mr 3, 27 +; EFPU2-NEXT: mr 4, 28 +; EFPU2-NEXT: mr 5, 29 +; EFPU2-NEXT: mr 6, 30 +; EFPU2-NEXT: bl __unorddf2 +; EFPU2-NEXT: bc 12, 10, .LBB16_3 +; EFPU2-NEXT: # %bb.1: # %entry +; EFPU2-NEXT: cmpwi 3, 0 +; EFPU2-NEXT: bc 4, 2, .LBB16_3 +; EFPU2-NEXT: # %bb.2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: b .LBB16_4 +; EFPU2-NEXT: .LBB16_3: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: .LBB16_4: # %ret +; EFPU2-NEXT: stw 3, 20(1) +; EFPU2-NEXT: lwz 3, 20(1) +; EFPU2-NEXT: evldd 30, 48(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 29, 40(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 12, 72(1) +; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload +; EFPU2-NEXT: mtcrf 32, 12 # cr2 +; EFPU2-NEXT: lwz 30, 88(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 84(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 80(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 76(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 100(1) +; EFPU2-NEXT: addi 1, 1, 96 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp ueq double %a, %b @@ -936,48 +1205,119 @@ ret: ret i32 %0 } -define i1 @test_dcmpne(double %a, double %b) { -; CHECK-LABEL: test_dcmpne: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: efdcmpeq 1, 3, 3 -; CHECK-NEXT: efdcmpeq 5, 3, 4 -; CHECK-NEXT: crand 24, 5, 1 -; CHECK-NEXT: crorc 20, 21, 24 -; CHECK-NEXT: bc 12, 20, .LBB43_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB43_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +define i1 @test_dcmpne(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpne: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: efdcmpeq 1, 3, 3 +; SPE-NEXT: efdcmpeq 5, 3, 4 +; SPE-NEXT: crand 24, 5, 1 +; SPE-NEXT: crorc 20, 21, 24 +; SPE-NEXT: bc 12, 20, .LBB17_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB17_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpne: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -96(1) +; EFPU2-NEXT: mfcr 12 +; EFPU2-NEXT: stw 27, 76(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 80(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 84(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 88(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 12, 72(1) +; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 27, 3 +; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 28, 4 +; EFPU2-NEXT: evstdd 29, 40(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 29, 5 +; EFPU2-NEXT: evstdd 30, 48(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 30, 6 +; EFPU2-NEXT: bl __unorddf2 +; EFPU2-NEXT: cmpwi 2, 3, 0 +; EFPU2-NEXT: mr 3, 27 +; EFPU2-NEXT: mr 4, 28 +; EFPU2-NEXT: mr 5, 29 +; EFPU2-NEXT: mr 6, 30 +; EFPU2-NEXT: bl __eqdf2 +; EFPU2-NEXT: evldd 30, 48(1) # 8-byte Folded Reload +; EFPU2-NEXT: cmpwi 3, 0 +; EFPU2-NEXT: evldd 29, 40(1) # 8-byte Folded Reload +; EFPU2-NEXT: li 4, 1 +; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload +; EFPU2-NEXT: crorc 20, 2, 10 +; EFPU2-NEXT: lwz 12, 72(1) +; EFPU2-NEXT: bc 12, 20, .LBB17_2 +; EFPU2-NEXT: # %bb.1: # %entry +; EFPU2-NEXT: ori 3, 4, 0 +; EFPU2-NEXT: b .LBB17_3 +; EFPU2-NEXT: .LBB17_2: # %entry +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB17_3: # %entry +; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload +; EFPU2-NEXT: mtcrf 32, 12 # cr2 +; EFPU2-NEXT: lwz 30, 88(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 84(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 80(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 76(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 100(1) +; EFPU2-NEXT: addi 1, 1, 96 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = fcmp one double %a, %b ret i1 %r } -define i32 @test_dcmpune(double %a, double %b) { -; CHECK-LABEL: test_dcmpune: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpeq 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB44_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB44_3 -; CHECK-NEXT: .LBB44_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB44_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpune(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpune: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpeq 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB18_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB18_3 +; SPE-NEXT: .LBB18_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB18_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpune: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __eqdf2 +; EFPU2-NEXT: cmplwi 3, 0 +; EFPU2-NEXT: beq 0, .LBB18_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB18_3 +; EFPU2-NEXT: .LBB18_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB18_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp une double %a, %b @@ -993,25 +1333,45 @@ ret: ret i32 %0 } -define i32 @test_dcmplt(double %a, double %b) { -; CHECK-LABEL: test_dcmplt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmplt 0, 3, 5 -; CHECK-NEXT: ble 0, .LBB45_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB45_3 -; CHECK-NEXT: .LBB45_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB45_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmplt(double %a, double %b) #0 { +; SPE-LABEL: test_dcmplt: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: ble 0, .LBB19_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB19_3 +; SPE-NEXT: .LBB19_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB19_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmplt: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __ltdf2 +; EFPU2-NEXT: cmpwi 3, -1 +; EFPU2-NEXT: bgt 0, .LBB19_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB19_3 +; EFPU2-NEXT: .LBB19_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB19_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp olt double %a, %b @@ -1027,31 +1387,51 @@ ret: ret i32 %0 } -define i32 @test_dcmpult(double %a, double %b) { -; CHECK-LABEL: test_dcmpult: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB46_4 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB46_4 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: efdcmplt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB46_4 -; CHECK-NEXT: # %bb.3: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB46_5 -; CHECK-NEXT: .LBB46_4: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB46_5: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpult(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpult: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: bc 4, 1, .LBB20_4 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: bc 4, 1, .LBB20_4 +; SPE-NEXT: # %bb.2: # %entry +; SPE-NEXT: efdcmplt 0, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB20_4 +; SPE-NEXT: # %bb.3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: b .LBB20_5 +; SPE-NEXT: .LBB20_4: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: .LBB20_5: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpult: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __gedf2 +; EFPU2-NEXT: cmpwi 3, -1 +; EFPU2-NEXT: bgt 0, .LBB20_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB20_3 +; EFPU2-NEXT: .LBB20_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB20_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp ult double %a, %b @@ -1067,48 +1447,81 @@ ret: ret i32 %0 } -define i1 @test_dcmpge(double %a, double %b) { -; CHECK-LABEL: test_dcmpge: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: efdcmpeq 1, 3, 3 -; CHECK-NEXT: efdcmplt 5, 3, 4 -; CHECK-NEXT: crand 24, 5, 1 -; CHECK-NEXT: crorc 20, 21, 24 -; CHECK-NEXT: bc 12, 20, .LBB47_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB47_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +define i1 @test_dcmpge(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpge: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: efdcmpeq 1, 3, 3 +; SPE-NEXT: efdcmplt 5, 3, 4 +; SPE-NEXT: crand 24, 5, 1 +; SPE-NEXT: crorc 20, 21, 24 +; SPE-NEXT: bc 12, 20, .LBB21_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB21_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpge: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __gedf2 +; EFPU2-NEXT: not 3, 3 +; EFPU2-NEXT: srwi 3, 3, 31 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = fcmp oge double %a, %b ret i1 %r } -define i32 @test_dcmpuge(double %a, double %b) { -; CHECK-LABEL: test_dcmpuge: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmplt 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB48_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB48_3 -; CHECK-NEXT: .LBB48_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB48_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +define i32 @test_dcmpuge(double %a, double %b) #0 { +; SPE-LABEL: test_dcmpuge: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB22_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB22_3 +; SPE-NEXT: .LBB22_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB22_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpuge: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __ltdf2 +; EFPU2-NEXT: cmpwi 3, 0 +; EFPU2-NEXT: blt 0, .LBB22_2 +; EFPU2-NEXT: # %bb.1: # %tr +; EFPU2-NEXT: li 3, 1 +; EFPU2-NEXT: b .LBB22_3 +; EFPU2-NEXT: .LBB22_2: # %fa +; EFPU2-NEXT: li 3, 0 +; EFPU2-NEXT: .LBB22_3: # %ret +; EFPU2-NEXT: stw 3, 12(1) +; EFPU2-NEXT: lwz 3, 12(1) +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %r = alloca i32, align 4 %c = fcmp uge double %a, %b @@ -1124,219 +1537,310 @@ ret: ret i32 %0 } -define double @test_dselect(double %a, double %b, i1 %c) { -; CHECK-LABEL: test_dselect: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. 7, 7, 1 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 4, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB49_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: evor 4, 5, 5 -; CHECK-NEXT: .LBB49_2: # %entry -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_dselect(double %a, double %b, i1 %c) #0 { +; SPE-LABEL: test_dselect: +; SPE: # %bb.0: # %entry +; SPE-NEXT: andi. 7, 7, 1 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 4, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB23_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: evor 4, 5, 5 +; SPE-NEXT: .LBB23_2: # %entry +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dselect: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: andi. 7, 7, 1 +; EFPU2-NEXT: bclr 12, 1, 0 +; EFPU2-NEXT: # %bb.1: # %entry +; EFPU2-NEXT: ori 3, 5, 0 +; EFPU2-NEXT: ori 4, 6, 0 +; EFPU2-NEXT: blr entry: %r = select i1 %c, double %a, double %b ret double %r } -define i32 @test_dtoui(double %a) { -; CHECK-LABEL: test_dtoui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdctuiz 3, 3 -; CHECK-NEXT: blr +define i32 @test_dtoui(double %a) #0 { +; SPE-LABEL: test_dtoui: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdctuiz 3, 3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dtoui: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __fixunsdfsi +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = fptoui double %a to i32 ret i32 %v } -define i32 @test_dtosi(double %a) { -; CHECK-LABEL: test_dtosi: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdctsiz 3, 3 -; CHECK-NEXT: blr +define i32 @test_dtosi(double %a) #0 { +; SPE-LABEL: test_dtosi: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdctsiz 3, 3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dtosi: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __fixdfsi +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = fptosi double %a to i32 ret i32 %v } -define double @test_dfromui(i32 %a) { -; CHECK-LABEL: test_dfromui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efdcfui 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_dfromui(i32 %a) #0 { +; SPE-LABEL: test_dfromui: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfui 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dfromui: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __floatunsidf +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = uitofp i32 %a to double ret double %v } -define double @test_dfromsi(i32 %a) { -; CHECK-LABEL: test_dfromsi: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efdcfsi 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +define double @test_dfromsi(i32 %a) #0 { +; SPE-LABEL: test_dfromsi: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfsi 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dfromsi: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: bl __floatsidf +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v = sitofp i32 %a to double ret double %v } -define i32 @test_dasmconst(double %x) { -; CHECK-LABEL: test_dasmconst: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evstdd 3, 8(1) -; CHECK-NEXT: #APP -; CHECK-NEXT: efdctsi 3, 3 -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr -entry: - %x.addr = alloca double, align 8 - store double %x, double* %x.addr, align 8 - %0 = load double, double* %x.addr, align 8 - %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0) - ret i32 %1 -} - declare double @test_spill_spe_regs(double, double); define dso_local void @test_func2() #0 { -; CHECK-LABEL: test_func2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: blr +; SPE-LABEL: test_func2: +; SPE: # %bb.0: # %entry +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_func2: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: blr entry: ret void } declare void @test_memset(i8* nocapture writeonly, i8, i32, i1) @global_var1 = global i32 0, align 4 -define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind { -; CHECK-LABEL: test_spill: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) -; CHECK-NEXT: stwu 1, -352(1) -; CHECK-NEXT: li 5, 256 -; CHECK-NEXT: evstddx 30, 1, 5 # 8-byte Folded Spill -; CHECK-NEXT: li 5, 264 -; CHECK-NEXT: evstddx 31, 1, 5 # 8-byte Folded Spill -; CHECK-NEXT: li 5, .LCPI56_0@l -; CHECK-NEXT: lis 6, .LCPI56_0@ha -; CHECK-NEXT: evlddx 5, 6, 5 -; CHECK-NEXT: stw 14, 280(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 15, 284(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 16, 288(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 17, 292(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 18, 296(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 19, 300(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 20, 304(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 21, 308(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 22, 312(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 23, 316(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 24, 320(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 25, 324(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 26, 328(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 27, 332(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 28, 336(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 29, 340(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 344(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 31, 348(1) # 4-byte Folded Spill -; CHECK-NEXT: evstdd 14, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 15, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 16, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 17, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 18, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 19, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 20, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 21, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 22, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 23, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 24, 208(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 25, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 26, 224(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 27, 232(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 28, 240(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 29, 248(1) # 8-byte Folded Spill -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: lwz 4, 360(1) -; CHECK-NEXT: efdadd 3, 3, 3 -; CHECK-NEXT: efdadd 3, 3, 5 -; CHECK-NEXT: evstdd 3, 24(1) # 8-byte Folded Spill -; CHECK-NEXT: stw 4, 20(1) # 4-byte Folded Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addi 3, 1, 76 -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: li 5, 24 -; CHECK-NEXT: li 6, 1 -; CHECK-NEXT: li 30, 0 -; CHECK-NEXT: bl test_memset -; CHECK-NEXT: lwz 3, 20(1) # 4-byte Folded Reload -; CHECK-NEXT: stw 30, 0(3) -; CHECK-NEXT: bl test_func2 -; CHECK-NEXT: addi 3, 1, 32 -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: li 5, 20 -; CHECK-NEXT: li 6, 1 -; CHECK-NEXT: bl test_memset -; CHECK-NEXT: evldd 4, 24(1) # 8-byte Folded Reload -; CHECK-NEXT: li 5, 264 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload -; CHECK-NEXT: li 5, 256 -; CHECK-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: evldd 29, 248(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 28, 240(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 27, 232(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 26, 224(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 25, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 24, 208(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 23, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 22, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 21, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 20, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 19, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 18, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 17, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 16, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 15, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 14, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 31, 348(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 30, 344(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 340(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 28, 336(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 27, 332(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 26, 328(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 25, 324(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 24, 320(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 23, 316(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 22, 312(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 21, 308(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 20, 304(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 19, 300(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 18, 296(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 17, 292(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 16, 288(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 15, 284(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 14, 280(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 0, 356(1) -; CHECK-NEXT: addi 1, 1, 352 -; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: blr +define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) #0 { +; SPE-LABEL: test_spill: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr 0 +; SPE-NEXT: stw 0, 4(1) +; SPE-NEXT: stwu 1, -352(1) +; SPE-NEXT: li 5, 256 +; SPE-NEXT: evstddx 30, 1, 5 # 8-byte Folded Spill +; SPE-NEXT: li 5, 264 +; SPE-NEXT: evstddx 31, 1, 5 # 8-byte Folded Spill +; SPE-NEXT: li 5, .LCPI29_0@l +; SPE-NEXT: lis 6, .LCPI29_0@ha +; SPE-NEXT: evlddx 5, 6, 5 +; SPE-NEXT: stw 14, 280(1) # 4-byte Folded Spill +; SPE-NEXT: stw 15, 284(1) # 4-byte Folded Spill +; SPE-NEXT: stw 16, 288(1) # 4-byte Folded Spill +; SPE-NEXT: stw 17, 292(1) # 4-byte Folded Spill +; SPE-NEXT: stw 18, 296(1) # 4-byte Folded Spill +; SPE-NEXT: stw 19, 300(1) # 4-byte Folded Spill +; SPE-NEXT: stw 20, 304(1) # 4-byte Folded Spill +; SPE-NEXT: stw 21, 308(1) # 4-byte Folded Spill +; SPE-NEXT: stw 22, 312(1) # 4-byte Folded Spill +; SPE-NEXT: stw 23, 316(1) # 4-byte Folded Spill +; SPE-NEXT: stw 24, 320(1) # 4-byte Folded Spill +; SPE-NEXT: stw 25, 324(1) # 4-byte Folded Spill +; SPE-NEXT: stw 26, 328(1) # 4-byte Folded Spill +; SPE-NEXT: stw 27, 332(1) # 4-byte Folded Spill +; SPE-NEXT: stw 28, 336(1) # 4-byte Folded Spill +; SPE-NEXT: stw 29, 340(1) # 4-byte Folded Spill +; SPE-NEXT: stw 30, 344(1) # 4-byte Folded Spill +; SPE-NEXT: stw 31, 348(1) # 4-byte Folded Spill +; SPE-NEXT: evstdd 14, 128(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 15, 136(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 16, 144(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 17, 152(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 18, 160(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 19, 168(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 20, 176(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 21, 184(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 22, 192(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 23, 200(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 24, 208(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 25, 216(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 26, 224(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 27, 232(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 28, 240(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 29, 248(1) # 8-byte Folded Spill +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: lwz 4, 360(1) +; SPE-NEXT: efdadd 3, 3, 3 +; SPE-NEXT: efdadd 3, 3, 5 +; SPE-NEXT: evstdd 3, 24(1) # 8-byte Folded Spill +; SPE-NEXT: stw 4, 20(1) # 4-byte Folded Spill +; SPE-NEXT: #APP +; SPE-NEXT: #NO_APP +; SPE-NEXT: addi 3, 1, 76 +; SPE-NEXT: li 4, 0 +; SPE-NEXT: li 5, 24 +; SPE-NEXT: li 6, 1 +; SPE-NEXT: li 30, 0 +; SPE-NEXT: bl test_memset +; SPE-NEXT: lwz 3, 20(1) # 4-byte Folded Reload +; SPE-NEXT: stw 30, 0(3) +; SPE-NEXT: bl test_func2 +; SPE-NEXT: addi 3, 1, 32 +; SPE-NEXT: li 4, 0 +; SPE-NEXT: li 5, 20 +; SPE-NEXT: li 6, 1 +; SPE-NEXT: bl test_memset +; SPE-NEXT: evldd 4, 24(1) # 8-byte Folded Reload +; SPE-NEXT: li 5, 264 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload +; SPE-NEXT: li 5, 256 +; SPE-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evldd 29, 248(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 28, 240(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 27, 232(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 26, 224(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 25, 216(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 24, 208(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 23, 200(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 22, 192(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 21, 184(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 20, 176(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 19, 168(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 18, 160(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 17, 152(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 16, 144(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 15, 136(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 14, 128(1) # 8-byte Folded Reload +; SPE-NEXT: lwz 31, 348(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 30, 344(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 29, 340(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 28, 336(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 27, 332(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 26, 328(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 25, 324(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 24, 320(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 23, 316(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 22, 312(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 21, 308(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 20, 304(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 19, 300(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 18, 296(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 17, 292(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 16, 288(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 15, 284(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 14, 280(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 0, 356(1) +; SPE-NEXT: addi 1, 1, 352 +; SPE-NEXT: mtlr 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_spill: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -176(1) +; EFPU2-NEXT: mr 5, 3 +; EFPU2-NEXT: mr 6, 4 +; EFPU2-NEXT: stw 27, 156(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 160(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 164(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 168(1) # 4-byte Folded Spill +; EFPU2-NEXT: evstdd 27, 104(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 28, 112(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 29, 120(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 30, 128(1) # 8-byte Folded Spill +; EFPU2-NEXT: lwz 28, 184(1) +; EFPU2-NEXT: bl __adddf3 +; EFPU2-NEXT: lis 5, 16393 +; EFPU2-NEXT: lis 6, -4069 +; EFPU2-NEXT: ori 5, 5, 8697 +; EFPU2-NEXT: ori 6, 6, 34414 +; EFPU2-NEXT: #APP +; EFPU2-NEXT: #NO_APP +; EFPU2-NEXT: bl __adddf3 +; EFPU2-NEXT: mr 30, 3 +; EFPU2-NEXT: mr 29, 4 +; EFPU2-NEXT: addi 3, 1, 52 +; EFPU2-NEXT: li 4, 0 +; EFPU2-NEXT: li 5, 24 +; EFPU2-NEXT: li 6, 1 +; EFPU2-NEXT: li 27, 0 +; EFPU2-NEXT: bl test_memset +; EFPU2-NEXT: stw 27, 0(28) +; EFPU2-NEXT: bl test_func2 +; EFPU2-NEXT: addi 3, 1, 8 +; EFPU2-NEXT: li 4, 0 +; EFPU2-NEXT: li 5, 20 +; EFPU2-NEXT: li 6, 1 +; EFPU2-NEXT: bl test_memset +; EFPU2-NEXT: mr 3, 30 +; EFPU2-NEXT: mr 4, 29 +; EFPU2-NEXT: evldd 30, 128(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 29, 120(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 28, 112(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 27, 104(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 168(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 164(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 160(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 156(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 180(1) +; EFPU2-NEXT: addi 1, 1, 176 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v1 = alloca [13 x i32], align 4 %v2 = alloca [11 x i32], align 4 @@ -1357,49 +1861,81 @@ return: } define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { -; CHECK-LABEL: test_fma: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) -; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset lr, 4 -; CHECK-NEXT: .cfi_offset r29, -12 -; CHECK-NEXT: .cfi_offset r30, -8 -; CHECK-NEXT: .cfi_offset r29, -40 -; CHECK-NEXT: .cfi_offset r30, -32 -; CHECK-NEXT: cmpwi 3, 1 -; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill -; CHECK-NEXT: blt 0, .LBB57_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: mr 30, 3 -; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: .LBB57_2: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: efscfsi 3, 29 -; CHECK-NEXT: mr 4, 3 -; CHECK-NEXT: bl fmaf -; CHECK-NEXT: addi 29, 29, 1 -; CHECK-NEXT: cmplw 30, 29 -; CHECK-NEXT: mr 5, 3 -; CHECK-NEXT: bne 0, .LBB57_2 -; CHECK-NEXT: b .LBB57_4 -; CHECK-NEXT: .LBB57_3: -; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: .LBB57_4: # %for.cond.cleanup -; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: mr 3, 5 -; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 0, 52(1) -; CHECK-NEXT: addi 1, 1, 48 -; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: blr +; SPE-LABEL: test_fma: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr 0 +; SPE-NEXT: stw 0, 4(1) +; SPE-NEXT: stwu 1, -48(1) +; SPE-NEXT: cmpwi 3, 1 +; SPE-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; SPE-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; SPE-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; SPE-NEXT: blt 0, .LBB30_3 +; SPE-NEXT: # %bb.1: # %for.body.preheader +; SPE-NEXT: mr 30, 3 +; SPE-NEXT: li 29, 0 +; SPE-NEXT: # implicit-def: $r5 +; SPE-NEXT: .LBB30_2: # %for.body +; SPE-NEXT: # +; SPE-NEXT: efscfsi 3, 29 +; SPE-NEXT: mr 4, 3 +; SPE-NEXT: bl fmaf +; SPE-NEXT: addi 29, 29, 1 +; SPE-NEXT: cmplw 30, 29 +; SPE-NEXT: mr 5, 3 +; SPE-NEXT: bne 0, .LBB30_2 +; SPE-NEXT: b .LBB30_4 +; SPE-NEXT: .LBB30_3: +; SPE-NEXT: # implicit-def: $r5 +; SPE-NEXT: .LBB30_4: # %for.cond.cleanup +; SPE-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; SPE-NEXT: mr 3, 5 +; SPE-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; SPE-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 0, 52(1) +; SPE-NEXT: addi 1, 1, 48 +; SPE-NEXT: mtlr 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_fma: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -48(1) +; EFPU2-NEXT: cmpwi 3, 1 +; EFPU2-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; EFPU2-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; EFPU2-NEXT: blt 0, .LBB30_3 +; EFPU2-NEXT: # %bb.1: # %for.body.preheader +; EFPU2-NEXT: mr 30, 3 +; EFPU2-NEXT: li 29, 0 +; EFPU2-NEXT: # implicit-def: $r5 +; EFPU2-NEXT: .LBB30_2: # %for.body +; EFPU2-NEXT: # +; EFPU2-NEXT: efscfsi 3, 29 +; EFPU2-NEXT: mr 4, 3 +; EFPU2-NEXT: bl fmaf +; EFPU2-NEXT: addi 29, 29, 1 +; EFPU2-NEXT: cmplw 30, 29 +; EFPU2-NEXT: mr 5, 3 +; EFPU2-NEXT: bne 0, .LBB30_2 +; EFPU2-NEXT: b .LBB30_4 +; EFPU2-NEXT: .LBB30_3: +; EFPU2-NEXT: # implicit-def: $r5 +; EFPU2-NEXT: .LBB30_4: # %for.cond.cleanup +; EFPU2-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; EFPU2-NEXT: mr 3, 5 +; EFPU2-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 52(1) +; EFPU2-NEXT: addi 1, 1, 48 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %cmp8 = icmp sgt i32 %d, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -1427,53 +1963,98 @@ attributes #1 = { nounwind readnone speculatable willreturn } declare i32 @foo(double) -define void @d(%struct.a* %e, %struct.a* %f) { -; CHECK-LABEL: d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) -; CHECK-NEXT: stwu 1, -64(1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 4 -; CHECK-NEXT: .cfi_offset r28, -16 -; CHECK-NEXT: .cfi_offset r29, -12 -; CHECK-NEXT: .cfi_offset r30, -8 -; CHECK-NEXT: .cfi_offset r28, -48 -; CHECK-NEXT: .cfi_offset r29, -40 -; CHECK-NEXT: .cfi_offset r30, -32 -; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill -; CHECK-NEXT: efdcfs 29, 4 -; CHECK-NEXT: stw 28, 48(1) # 4-byte Folded Spill -; CHECK-NEXT: mr 4, 29 -; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill -; CHECK-NEXT: efdcfs 30, 3 -; CHECK-NEXT: evmergehi 3, 29, 29 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: bl foo -; CHECK-NEXT: mr 28, 3 -; CHECK-NEXT: evmergehi 3, 30, 30 -; CHECK-NEXT: mr 4, 30 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: bl foo -; CHECK-NEXT: efdcfsi 3, 28 -; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: efdmul 3, 29, 3 -; CHECK-NEXT: efscfd 3, 3 -; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload -; CHECK-NEXT: stw 3, 0(3) -; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 52(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 28, 48(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 0, 68(1) -; CHECK-NEXT: addi 1, 1, 64 -; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: blr +define void @d(%struct.a* %e, %struct.a* %f) #0 { +; SPE-LABEL: d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr 0 +; SPE-NEXT: stw 0, 4(1) +; SPE-NEXT: stwu 1, -64(1) +; SPE-NEXT: lwz 4, 0(4) +; SPE-NEXT: lwz 3, 0(3) +; SPE-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; SPE-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill +; SPE-NEXT: efdcfs 29, 4 +; SPE-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; SPE-NEXT: mr 4, 29 +; SPE-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; SPE-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill +; SPE-NEXT: efdcfs 30, 3 +; SPE-NEXT: evmergehi 3, 29, 29 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl foo +; SPE-NEXT: mr 28, 3 +; SPE-NEXT: evmergehi 3, 30, 30 +; SPE-NEXT: mr 4, 30 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl foo +; SPE-NEXT: efdcfsi 3, 28 +; SPE-NEXT: evldd 30, 32(1) # 8-byte Folded Reload +; SPE-NEXT: efdmul 3, 29, 3 +; SPE-NEXT: efscfd 3, 3 +; SPE-NEXT: evldd 29, 24(1) # 8-byte Folded Reload +; SPE-NEXT: stw 3, 0(3) +; SPE-NEXT: evldd 28, 16(1) # 8-byte Folded Reload +; SPE-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 0, 68(1) +; SPE-NEXT: addi 1, 1, 64 +; SPE-NEXT: mtlr 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: d: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -96(1) +; EFPU2-NEXT: lwz 3, 0(3) +; EFPU2-NEXT: stw 26, 72(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 27, 76(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 80(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 84(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 88(1) # 4-byte Folded Spill +; EFPU2-NEXT: evstdd 26, 16(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 29, 40(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 30, 48(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 30, 4 +; EFPU2-NEXT: bl __extendsfdf2 +; EFPU2-NEXT: mr 28, 3 +; EFPU2-NEXT: lwz 3, 0(30) +; EFPU2-NEXT: mr 29, 4 +; EFPU2-NEXT: bl __extendsfdf2 +; EFPU2-NEXT: mr 30, 4 +; EFPU2-NEXT: mr 27, 3 +; EFPU2-NEXT: bl foo +; EFPU2-NEXT: mr 26, 3 +; EFPU2-NEXT: mr 3, 28 +; EFPU2-NEXT: mr 4, 29 +; EFPU2-NEXT: bl foo +; EFPU2-NEXT: mr 3, 26 +; EFPU2-NEXT: bl __floatsidf +; EFPU2-NEXT: mr 6, 4 +; EFPU2-NEXT: mr 5, 3 +; EFPU2-NEXT: mr 3, 27 +; EFPU2-NEXT: mr 4, 30 +; EFPU2-NEXT: bl __muldf3 +; EFPU2-NEXT: bl __truncdfsf2 +; EFPU2-NEXT: stw 3, 0(3) +; EFPU2-NEXT: evldd 30, 48(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 29, 40(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 26, 16(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 88(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 84(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 80(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 76(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 26, 72(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 100(1) +; EFPU2-NEXT: addi 1, 1, 96 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0 %1 = load float, float* undef @@ -1488,3 +2069,26 @@ entry: store float %l, float* undef ret void } +attributes #0 = { nounwind } + +;--- hwdouble.ll +; split into separate file because the efd* instructions are invalid on efpu2 +define i32 @test_dasmconst(double %x) #0 { +; SPE-LABEL: test_dasmconst: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evstdd 3, 8(1) +; SPE-NEXT: #APP +; SPE-NEXT: efdctsi 3, 3 +; SPE-NEXT: #NO_APP +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +entry: + %x.addr = alloca double, align 8 + store double %x, double* %x.addr, align 8 + %0 = load double, double* %x.addr, align 8 + %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0) + ret i32 %1 +} +attributes #0 = { nounwind } From 4086072f8a9200216088c435c9aa90a2d8ed74a5 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Mon, 11 Jan 2021 08:50:00 -0500 Subject: [PATCH 31/86] Reland "[mlir][linalg] Support parsing attributes in named op spec" With this, now we can specify a list of attributes on named ops generated from the spec. The format is defined as ``` attr-id ::= bare-id (`?`)? attr-typedef ::= type (`[` `]`)? attr-def ::= attr-id `:` attr-typedef tc-attr-def ::= `attr` `(` attr-def-list `)` tc-def ::= `def` bare-id `(`tensor-def-list`)` `->` `(` tensor-def-list`)` (tc-attr-def)? ``` For example, ``` ods_def def some_op(...) -> (...) attr( f32_attr: f32, i32_attr: i32, array_attr : f32[], optional_attr? : f32 ) ``` where `?` means optional attribute and `[]` means array type. Reviewed By: hanchung, nicolasvasilache Differential Revision: https://reviews.llvm.org/D94240 --- .../test-linalg-ods-gen.tc | 22 +++ .../mlir-linalg-ods-gen.cpp | 181 +++++++++++++++++- 2 files changed, 199 insertions(+), 4 deletions(-) diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc index f81380f02bb382..1ef12876063778 100644 --- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc +++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc @@ -72,3 +72,25 @@ ods_def : def test3(A: f32(Batch, M, K), B: f32(K, N)) -> (C: f32(Batch, M, N)) { C(b, m, n) = std_addf(std_mulf(A(b, m, k), B(k, n))); } + +// Test attribute definitions +// ODS-LABEL: def Test4Op +// ODS: F32ArrayAttr:$array_attr, +// ODS: F32:$f32_attr, +// ODS: RankedF32ElementsAttr<[4]>:$fvec_attr, +// ODS: I32:$i32_attr, +// ODS: RankedI32ElementsAttr<[5, 6]>:$ivec_attr, +// ODS: OptionalAttr:$optional_attr +// +ods_def : +def test4(A: f32(Batch, M, K), B: f32(K, N)) -> (C: f32(Batch, M, N)) +attr( + f32_attr: f32, + i32_attr: i32, + fvec_attr: 4xf32, + ivec_attr: 5x6xi32, + array_attr : f32[], + optional_attr? : f32 +) { + C(b, m, n) = std_addf(std_mulf(A(b, m, k), B(k, n))); +} diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp index 592e6cb774fbf6..138c5a4e904e77 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -20,11 +20,17 @@ #include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ToolOutputFile.h" +#include + #define DEBUG_TYPE "linalg-ods-gen" static llvm::cl::OptionCategory ODSGenCat("Linalg ODS Gen"); @@ -79,11 +85,14 @@ class Token { gt, l_brace, l_paren, + l_square, lt, minus, plus, + question, r_brace, r_paren, + r_square, semicolon, star, @@ -91,6 +100,7 @@ class Token { kw_def, FIRST_KEYWORD = kw_def, kw_ods_def, + kw_attr_def, kw_floordiv, kw_ceildiv, kw_mod, @@ -151,6 +161,10 @@ class Lexer { Token emitError(llvm::SMLoc loc, const Twine &msg); Token emitError(const char *loc, const Twine &msg); + /// Change the position of the lexer cursor. The next token we lex will start + /// at the designated point in the input. + void resetPointer(const char *newPtr) { curPtr = newPtr; } + private: Token formToken(Token::Kind kind, const char *tokStart) { return Token(kind, StringRef(tokStart, curPtr - tokStart)); @@ -247,10 +261,14 @@ Token Lexer::lexToken() { return formToken(Token::Kind::l_brace, tokStart); case '(': return formToken(Token::Kind::l_paren, tokStart); + case '[': + return formToken(Token::Kind::l_square, tokStart); case '}': return formToken(Token::Kind::r_brace, tokStart); case ')': return formToken(Token::Kind::r_paren, tokStart); + case ']': + return formToken(Token::Kind::r_square, tokStart); case '<': return formToken(Token::Kind::lt, tokStart); case '>': @@ -263,6 +281,8 @@ Token Lexer::lexToken() { return formToken(Token::Kind::semicolon, tokStart); case '*': return formToken(Token::Kind::star, tokStart); + case '?': + return formToken(Token::Kind::question, tokStart); case '/': if (*curPtr == '/') { skipComment(); @@ -289,6 +309,7 @@ Token Lexer::lexIdentifier(const char *tokStart) { // Check to see if this identifier is a keyword. StringRef str(tokStart, curPtr - tokStart); Token::Kind kind = StringSwitch(str) + .Case("attr", Token::Kind::kw_attr_def) .Case("def", Token::Kind::kw_def) .Case("ods_def", Token::Kind::kw_ods_def) .Case("floordiv", Token::Kind::kw_floordiv) @@ -352,29 +373,40 @@ class Parser { "shouldn't advance past EOF or errors"); curToken = lexer.lexToken(); } + void consumeToken(Token::Kind kind) { assert(curToken.getKind() == kind && "unexpected token"); curToken = lexer.lexToken(); } + LogicalResult parseToken(Token::Kind kind, const Twine &msg) { if (curToken.getKind() != kind) return emitError(curToken.getLoc(), msg); consumeToken(); return success(); } + + /// Parses an optional token and returns failure if failed to parse. + LogicalResult parseOptionalToken(Token::Kind kind) { + return success(consumeIf(kind)); + } + LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) { lexer.emitError(loc, msg); return failure(); } + LogicalResult emitError(const Twine &msg) { return emitError(curToken.getLoc(), msg); } + bool consumeIf(Token::Kind kind) { if (curToken.isNot(kind)) return false; consumeToken(kind); return true; } + LogicalResult parseCommaSeparatedList(llvm::function_ref parseElement) { // Non-empty case starts with an element. @@ -388,6 +420,7 @@ class Parser { } return success(); } + LogicalResult parseCommaSeparatedListUntil(Token::Kind rightToken, llvm::function_ref parseElement, @@ -961,6 +994,8 @@ class TCParser { LogicalResult parseTensorUse(TensorUse &result, ComprehensionParsingState &state); + LogicalResult parseAttrDef(); + /// Parses a tensor expression. LogicalResult parseExpression(TensorUse currentDefinition, std::unique_ptr &result, @@ -1010,15 +1045,29 @@ class TCParser { unsigned index; }; + //===--------------------------------------------------------------------===// + // Internal bookkeeping of attributes. + //===--------------------------------------------------------------------===// + struct RegisteredAttr { + StringRef elementType; + SmallVector vectorDims; + bool isArray; + bool isOptional; + }; + //===--------------------------------------------------------------------===// // Per-TC def state. //===--------------------------------------------------------------------===// /// Symbols are per TC def. AffineSymbolList symbols; + /// Tensors are per TC def. llvm::StringMap registeredTensors; unsigned nextRegisteredTensorIndex; + /// Attributes are per TC def. + std::map registeredAttrs; + Parser &parser; }; } // namespace @@ -1170,6 +1219,73 @@ LogicalResult TCParser::parseTensorUse(TensorUse &result, return success(); } +/// Parse the information for an attribute def of the form: +/// +/// affine-expr-list ::= affine-expr (`,` affine-expr )* +/// attr-id ::= bare-id (`?`)? +/// dim-list ::= (integer-literal 'x')+ +/// attr-typedef ::= dim-list? type (`[` `]`)? +/// attr-def ::= attr-id `:` attr-typedef +LogicalResult TCParser::parseAttrDef() { + auto attrLoc = parser.curToken.getLoc(); + StringRef attrName = parser.curToken.getSpelling(); + if (failed(parser.parseToken(Token::Kind::id, "expected an id"))) + return failure(); + bool isOptional = succeeded(parser.parseOptionalToken(Token::Kind::question)); + if (failed(parser.parseToken(Token::Kind::colon, "expected colon"))) + return failure(); + + // Parse the attribute's type. We don't expect the type to be arbitrary + // complex, so just use this ad-hoc handling here. + + // Parse potential dimension list + SmallVector vectorDims; + while (parser.curToken.is(Token::Kind::integer)) { + vectorDims.push_back(parser.curToken.getUInt64IntegerValue().getValue()); + parser.consumeToken(); + + StringRef spelling = parser.curToken.getSpelling(); + if (spelling[0] != 'x') + return parser.emitError(parser.curToken.getLoc(), + "expected 'x' in dimension list"); + + // If we had a prefix of 'x', lex the next token immediately after the 'x'. + if (spelling.size() != 1) + parser.lexer.resetPointer(spelling.data() + 1); + + parser.consumeToken(); + } + + StringRef elementType = parser.curToken.getSpelling(); + if (failed(parser.parseToken(Token::Kind::id, "expected an id"))) + return failure(); + + bool isArray = false; + auto arrayLoc = parser.curToken.getLoc(); + if (succeeded(parser.parseOptionalToken(Token::Kind::l_square))) { + isArray = true; + if (failed(parser.parseToken(Token::Kind::r_square, "expected ']'"))) + return failure(); + } + + if (!vectorDims.empty() && isArray) + return parser.emitError(arrayLoc, "unsupported vector array attribute"); + + auto iterBoolPair = registeredAttrs.emplace( + attrName.str(), + RegisteredAttr{elementType, vectorDims, isArray, isOptional}); + if (!iterBoolPair.second) + return parser.emitError(attrLoc, + "Failed to register attribute '" + attrName + "'"); + + LLVM_DEBUG(llvm::dbgs() << "Recorded: " << (isOptional ? "[optional]" : "") + << " " << attrName << " " + << "with type: " << elementType + << (isArray ? "[]" : "") << "\n"); + + return success(); +} + /// Parses a tensor expression of the form: /// /// op-spec ::= bare-id `<` reduction-dims-list `>` @@ -1341,10 +1457,13 @@ TCParser::parseOneComprehension(StringRef cppOpName, StringRef linalgOpName, /// Parse and print the information for a ODS def. /// /// tensor-def-list ::= tensor-def (`,` tensor-def )* +/// attr-def-list ::= attr-def (`,` attr-def )* /// /// comprehension-list ::= comprehension comprehension* /// +/// tc-attr-def ::= `attr` `(` attr-def-list `)` /// tc-def ::= `def` bare-id `(`tensor-def-list`)` `->` `(` tensor-def-list`)` +/// (tc-attr-def)? /// `{` comprehension-list `}` /// /// ods-def ::= `ods_def` `<` bare-id `>` `:` tc-def @@ -1353,6 +1472,7 @@ TCParser::parseOneComprehension(StringRef cppOpName, StringRef linalgOpName, /// contain only expressions involving symbols and constants), but can /// otherwise contain arbitrary affine expressions. LogicalResult TCParser::parseAndEmitODSDef(llvm::raw_ostream &os) { + // Parse def header (including C++ op name) if (failed(parser.parseToken(Token::Kind::kw_ods_def, "expected 'ods_def' to define a TC ODS")) || failed(parser.parseToken(Token::Kind::lt, "expected '<'"))) @@ -1364,12 +1484,15 @@ LogicalResult TCParser::parseAndEmitODSDef(llvm::raw_ostream &os) { failed(parser.parseToken(Token::Kind::gt, "expected '>'")) || failed(parser.parseToken(Token::Kind::colon, "expected ':'"))) return failure(); + if (failed(parser.parseToken(Token::Kind::kw_def, "expected 'def' to define a TC"))) return failure(); StringRef tcName = parser.curToken.getSpelling(); LLVM_DEBUG(llvm::dbgs() << "\n\nStart parsing TC: " << tcName << "\n"); + + // Parse input/output tensor definitions if (failed(parser.parseToken(Token::Kind::id, "expected id")) || failed(parser.parseToken(Token::Kind::l_paren, "expected '('"))) return failure(); @@ -1392,6 +1515,16 @@ LogicalResult TCParser::parseAndEmitODSDef(llvm::raw_ostream &os) { Token::Kind::r_paren, parseOutputDef, /*allowEmptyList=*/false))) return failure(); + // Parse optional attribute definitions + if (succeeded(parser.parseOptionalToken(Token::Kind::kw_attr_def))) { + if (failed(parser.parseToken(Token::Kind::l_paren, "expected '('"))) + return failure(); + if (failed(parser.parseCommaSeparatedListUntil( + Token::Kind::r_paren, std::bind(&TCParser::parseAttrDef, this), + /*allowEmptyList=*/false))) + return failure(); + } + // Since we don't declare symbols separately, we discover them eagerly: each // newly encountered id in a tensor shape expression is treated as a new // symbolic. At this point, all tensors have been parsed and all the symbols @@ -1450,12 +1583,52 @@ LogicalResult TCParser::parseAndEmitODSDef(llvm::raw_ostream &os) { void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, StringRef linalgOpName, ComprehensionParsingState &state) { + SmallVector attributes; + for (const auto &attr : registeredAttrs) { + llvm::StringRef name = attr.first; + + llvm::StringRef elementType = attr.second.elementType; + std::string odsType = llvm::StringSwitch(elementType) + .Case("f32", "F32") + .Case("i32", "I32") + .Default(""); + if (odsType.empty()) { + parser.emitError("unimplemented support for attribute element type: " + + elementType); + return; + } + + const auto &dims = attr.second.vectorDims; + if (!dims.empty()) { + SmallVector dimStrs; + for (uint64_t dim : dims) + dimStrs.push_back(std::to_string(dim)); + odsType = llvm::formatv("Ranked{0}ElementsAttr<[{1}]>", odsType, + llvm::join(dimStrs, ", ")); + } + + assert(dims.empty() || !attr.second.isArray); + if (attr.second.isArray) + odsType = llvm::formatv("{0}ArrayAttr", odsType); + + if (attr.second.isOptional) + odsType = llvm::formatv("OptionalAttr<{0}>", odsType); + + attributes.push_back(llvm::formatv("{0}:${1}", odsType, name)); + } + + std::string attrList = llvm::join(attributes, ",\n"); + if (!attrList.empty()) + attrList = ",\n" + attrList; + const char *header = R"FMT( def {0} : LinalgStructuredBase_Op<"{1}", [ AttrSizedOperandSegments, DeclareOpInterfaceMethods, SingleBlockImplicitTerminator<"YieldOp">]> { - let arguments = (ins Variadic:$inputs, - Variadic:$outputs); + let arguments = (ins + Variadic:$inputs, + Variadic:$outputs{4} + ); let results = (outs Variadic:$result_tensors); let regions = (region AnyRegion:$region); @@ -1515,7 +1688,7 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, static std::function getRegionBuilder() {{ return regionBuilder; } // Generic methods. - static unsigned getNumRegionArgs() {{ return {4}; } + static unsigned getNumRegionArgs() {{ return {5}; } std::string getLibraryCallName() {{ return generateLibraryCallName(getOperation()); } @@ -1531,7 +1704,7 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, } os << llvm::formatv(header, cppOpName, linalgOpName, nInputs, nOutputs, - state.orderedTensorArgs.size()); + attrList, state.orderedTensorArgs.size()); } /// Print the C++ StructuredOpsInterface impl of `iterator_types`. From 4fa01f72de6cc48a44afe057c04803711160c92d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 12 Jan 2021 17:02:56 +0100 Subject: [PATCH 32/86] [mlir][CAPI] Fix inline function declaration Add `static` keyword, otherwise build fail with linker error for some cases. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D94496 --- mlir/include/mlir-c/AffineExpr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir-c/AffineExpr.h b/mlir/include/mlir-c/AffineExpr.h index ec445682c0110a..d5c6e7b9f29e7a 100644 --- a/mlir/include/mlir-c/AffineExpr.h +++ b/mlir/include/mlir-c/AffineExpr.h @@ -50,7 +50,7 @@ MLIR_CAPI_EXPORTED bool mlirAffineExprEqual(MlirAffineExpr lhs, /// Returns `true` if the given affine expression is a null expression. Note /// constant zero is not a null expression. -inline bool mlirAffineExprIsNull(MlirAffineExpr affineExpr) { +inline static bool mlirAffineExprIsNull(MlirAffineExpr affineExpr) { return affineExpr.ptr == NULL; } From 9667d15e7496e6d8c313251f22ac157dbbd0c1c2 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 12 Jan 2021 17:06:06 +0100 Subject: [PATCH 33/86] [mlir] Fix for LIT tests Add `MLIR_SPIRV_CPU_RUNNER_ENABLED` to `llvm_canonicalize_cmake_booleans`. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D94407 --- mlir/test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index f6d5af14163036..293d93268a1118 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -12,6 +12,7 @@ llvm_canonicalize_cmake_booleans( MLIR_CUDA_RUNNER_ENABLED MLIR_ROCM_CONVERSIONS_ENABLED MLIR_ROCM_RUNNER_ENABLED + MLIR_SPIRV_CPU_RUNNER_ENABLED MLIR_VULKAN_RUNNER_ENABLED ) From 1f1250151f222ba391d05dcc173f4b6c65d05ca2 Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Tue, 12 Jan 2021 17:06:58 +0100 Subject: [PATCH 34/86] [libc++] [C++2b] [P1048] Add is_scoped_enum and is_scoped_enum_v. * https://wg21.link/p1048 Reviewed By: ldionne, #libc Differential Revision: https://reviews.llvm.org/D94409 --- libcxx/docs/Cxx2bStatusPaperStatus.csv | 2 +- libcxx/docs/FeatureTestMacroTable.rst | 2 +- libcxx/include/type_traits | 22 ++++ libcxx/include/version | 2 +- .../type_traits.version.pass.cpp | 16 +-- .../version.version.pass.cpp | 16 +-- .../meta.unary.prop/is_scoped_enum.pass.cpp | 120 ++++++++++++++++++ .../generate_feature_test_macro_components.py | 1 - 8 files changed, 155 insertions(+), 26 deletions(-) create mode 100644 libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_scoped_enum.pass.cpp diff --git a/libcxx/docs/Cxx2bStatusPaperStatus.csv b/libcxx/docs/Cxx2bStatusPaperStatus.csv index c79509528addd2..f5c893fdbd4819 100644 --- a/libcxx/docs/Cxx2bStatusPaperStatus.csv +++ b/libcxx/docs/Cxx2bStatusPaperStatus.csv @@ -1,6 +1,6 @@ "Paper #","Group","Paper Name","Meeting","Status","First released version" "`P0881R7 `__","LWG","A Proposal to add stacktrace library","Autumn 2020","","" "`P0943R6 `__","LWG","Support C atomics in C++","Autumn 2020","","" -"`P1048R1 `__","LWG","A proposal for a type trait to detect scoped enumerations","Autumn 2020","","" +"`P1048R1 `__","LWG","A proposal for a type trait to detect scoped enumerations","Autumn 2020","|Complete|","12.0" "`P1679R3 `__","LWG","string contains function","Autumn 2020","","" "","","","","","" diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 99fb4e790c7d6d..8221bbe2a4afe4 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -292,7 +292,7 @@ Status ------------------------------------------------- ----------------- **C++ 2b** ------------------------------------------------------------------- - ``__cpp_lib_is_scoped_enum`` *unimplemented* + ``__cpp_lib_is_scoped_enum`` ``202011L`` ------------------------------------------------- ----------------- ``__cpp_lib_stacktrace`` *unimplemented* ------------------------------------------------- ----------------- diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 99b2a8f9f025f9..48884eab8e86a0 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -51,6 +51,7 @@ namespace std template struct is_arithmetic; template struct is_fundamental; template struct is_member_pointer; + template struct is_scoped_enum; // C++2b template struct is_scalar; template struct is_object; template struct is_compound; @@ -284,6 +285,8 @@ namespace std = is_compound::value; // C++17 template inline constexpr bool is_member_pointer_v = is_member_pointer::value; // C++17 + template inline constexpr bool is_scoped_enum_v + = is_scoped_enum::value; // C++2b // See C++14 20.10.4.3, type properties template inline constexpr bool is_const_v @@ -4177,6 +4180,25 @@ struct __has_operator_addressof #endif // _LIBCPP_CXX03_LANG +// is_scoped_enum [meta.unary.prop] + +#if _LIBCPP_STD_VER > 20 +template > +struct __is_scoped_enum_helper : false_type {}; + +template +struct __is_scoped_enum_helper<_Tp, true> + : public bool_constant > > {}; + +template +struct _LIBCPP_TEMPLATE_VIS is_scoped_enum + : public __is_scoped_enum_helper<_Tp> {}; + +template +_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_scoped_enum_v = + is_scoped_enum<_Tp>::value; +#endif + #if _LIBCPP_STD_VER > 14 template diff --git a/libcxx/include/version b/libcxx/include/version index 3920b69a601cda..9e5fc81da44e3d 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -355,7 +355,7 @@ __cpp_lib_void_t 201411L #endif #if _LIBCPP_STD_VER > 20 -// # define __cpp_lib_is_scoped_enum 202011L +# define __cpp_lib_is_scoped_enum 202011L // # define __cpp_lib_stacktrace 202011L // # define __cpp_lib_stdatomic_h 202011L // # define __cpp_lib_string_contains 202011L diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp index 04c6aaa81fd702..41bc22f6a330cf 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp @@ -638,17 +638,11 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_is_scoped_enum -# error "__cpp_lib_is_scoped_enum should be defined in c++2b" -# endif -# if __cpp_lib_is_scoped_enum != 202011L -# error "__cpp_lib_is_scoped_enum should have the value 202011L in c++2b" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_is_scoped_enum -# error "__cpp_lib_is_scoped_enum should not be defined because it is unimplemented in libc++!" -# endif +#ifndef __cpp_lib_is_scoped_enum +#error "__cpp_lib_is_scoped_enum should be defined in c++2b" +#endif +#if __cpp_lib_is_scoped_enum != 202011L +#error "__cpp_lib_is_scoped_enum should have the value 202011L in c++2b" # endif # ifndef __cpp_lib_is_swappable diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index 664c8fe758393c..3ff920c3a4891e 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -3983,17 +3983,11 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_is_scoped_enum -# error "__cpp_lib_is_scoped_enum should be defined in c++2b" -# endif -# if __cpp_lib_is_scoped_enum != 202011L -# error "__cpp_lib_is_scoped_enum should have the value 202011L in c++2b" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_is_scoped_enum -# error "__cpp_lib_is_scoped_enum should not be defined because it is unimplemented in libc++!" -# endif +#ifndef __cpp_lib_is_scoped_enum +#error "__cpp_lib_is_scoped_enum should be defined in c++2b" +#endif +#if __cpp_lib_is_scoped_enum != 202011L +#error "__cpp_lib_is_scoped_enum should have the value 202011L in c++2b" # endif # ifndef __cpp_lib_is_swappable diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_scoped_enum.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_scoped_enum.pass.cpp new file mode 100644 index 00000000000000..ad50755a00cd16 --- /dev/null +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_scoped_enum.pass.cpp @@ -0,0 +1,120 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++2a + +// type_traits + +// is_scoped_enum // C++2b + +#include +#include // for std::nullptr_t +#include "test_macros.h" + +template +void test_positive() { + static_assert(std::is_scoped_enum::value); + static_assert(std::is_scoped_enum::value); + static_assert(std::is_scoped_enum::value); + static_assert(std::is_scoped_enum::value); + + static_assert(std::is_scoped_enum_v); + static_assert(std::is_scoped_enum_v); + static_assert(std::is_scoped_enum_v); + static_assert(std::is_scoped_enum_v); +} + +template +void test_negative() { + static_assert(!std::is_scoped_enum::value); + static_assert(!std::is_scoped_enum::value); + static_assert(!std::is_scoped_enum::value); + static_assert(!std::is_scoped_enum::value); + + static_assert(!std::is_scoped_enum_v); + static_assert(!std::is_scoped_enum_v); + static_assert(!std::is_scoped_enum_v); + static_assert(!std::is_scoped_enum_v); +} + +class Empty {}; + +class NotEmpty { + virtual ~NotEmpty(); +}; + +union Union {}; + +struct bit_zero { + int : 0; +}; + +class Abstract { + virtual ~Abstract() = 0; +}; + +enum Enum { zero, one }; +enum class CEnum1 { zero, one }; +enum class CEnum2; +enum class CEnum3 : short; +struct incomplete_type; + +using FunctionPtr = void (*)(); +using FunctionType = void(); + +struct TestMembers { + static int static_method(int) { return 0; } + int method() { return 0; } + + enum E1 { m_zero, m_one }; + enum class CE1; +}; + +void func1(); +int func2(int); + +int main(int, char**) { + test_positive(); + test_positive(); + test_positive(); + test_positive(); + + test_negative(); + test_negative(); + + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + test_negative(); + + return 0; +} diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index daa40f2a15ac22..c08771fc056c98 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -368,7 +368,6 @@ def add_version_header(tc): "name": "__cpp_lib_is_scoped_enum", "values": { "c++2b": 202011 }, "headers": ["type_traits"], - "unimplemented": True, }, { "name": "__cpp_lib_is_swappable", "values": { "c++17": 201603 }, From 8349fa0fdd3a372f88ea53de6c906d987c1f4fec Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Tue, 12 Jan 2021 11:11:45 -0500 Subject: [PATCH 35/86] [mlir][spirv] NFC: split deserialization into multiple source files This avoids large source files and gives a better structure. It also allows leveraging compilation parallelism. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D94360 --- mlir/lib/Target/CMakeLists.txt | 51 +- mlir/lib/Target/SPIRV/CMakeLists.txt | 28 + .../SPIRV/Deserialization/CMakeLists.txt | 17 + .../SPIRV/Deserialization/Deserialization.cpp | 23 + .../SPIRV/Deserialization/DeserializeOps.cpp | 565 +++++++ .../Deserializer.cpp} | 1303 ++--------------- .../SPIRV/Deserialization/Deserializer.h | 613 ++++++++ .../Target/SPIRV/Serialization/CMakeLists.txt | 15 + .../{ => Serialization}/Serialization.cpp | 0 mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp | 13 +- 10 files changed, 1364 insertions(+), 1264 deletions(-) create mode 100644 mlir/lib/Target/SPIRV/CMakeLists.txt create mode 100644 mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt create mode 100644 mlir/lib/Target/SPIRV/Deserialization/Deserialization.cpp create mode 100644 mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp rename mlir/lib/Target/SPIRV/{Deserialization.cpp => Deserialization/Deserializer.cpp} (58%) create mode 100644 mlir/lib/Target/SPIRV/Deserialization/Deserializer.h create mode 100644 mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt rename mlir/lib/Target/SPIRV/{ => Serialization}/Serialization.cpp (100%) diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt index 1b1a02db551104..51a0e78a4edffd 100644 --- a/mlir/lib/Target/CMakeLists.txt +++ b/mlir/lib/Target/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(SPIRV) + add_mlir_translation_library(MLIRTargetLLVMIRModuleTranslation LLVMIR/DebugTranslation.cpp LLVMIR/ModuleTranslation.cpp @@ -132,52 +134,3 @@ add_mlir_translation_library(MLIRTargetROCDLIR MLIRROCDLIR MLIRTargetLLVMIRModuleTranslation ) - -add_mlir_translation_library(MLIRSPIRVBinaryUtils - SPIRV/SPIRVBinaryUtils.cpp - - LINK_LIBS PUBLIC - MLIRIR - MLIRSPIRV - MLIRSupport - ) - -add_mlir_translation_library(MLIRSPIRVSerialization - SPIRV/Serialization.cpp - - DEPENDS - MLIRSPIRVSerializationGen - - LINK_LIBS PUBLIC - MLIRIR - MLIRSPIRV - MLIRSPIRVBinaryUtils - MLIRSupport - MLIRTranslation - ) - -add_mlir_translation_library(MLIRSPIRVDeserialization - SPIRV/Deserialization.cpp - - DEPENDS - MLIRSPIRVSerializationGen - - LINK_LIBS PUBLIC - MLIRIR - MLIRSPIRV - MLIRSPIRVBinaryUtils - MLIRSupport - MLIRTranslation - ) - -add_mlir_translation_library(MLIRSPIRVTranslateRegistration - SPIRV/TranslateRegistration.cpp - - LINK_LIBS PUBLIC - MLIRIR - MLIRSPIRV - MLIRSPIRVSerialization - MLIRSPIRVDeserialization - MLIRSupport - MLIRTranslation - ) diff --git a/mlir/lib/Target/SPIRV/CMakeLists.txt b/mlir/lib/Target/SPIRV/CMakeLists.txt new file mode 100644 index 00000000000000..cddbc097133794 --- /dev/null +++ b/mlir/lib/Target/SPIRV/CMakeLists.txt @@ -0,0 +1,28 @@ +add_subdirectory(Deserialization) +add_subdirectory(Serialization) + +set(LLVM_OPTIONAL_SOURCES + SPIRVBinaryUtils.cpp + TranslateRegistration.cpp + ) + +add_mlir_translation_library(MLIRSPIRVBinaryUtils + SPIRVBinaryUtils.cpp + + LINK_LIBS PUBLIC + MLIRIR + MLIRSPIRV + MLIRSupport + ) + +add_mlir_translation_library(MLIRSPIRVTranslateRegistration + TranslateRegistration.cpp + + LINK_LIBS PUBLIC + MLIRIR + MLIRSPIRV + MLIRSPIRVSerialization + MLIRSPIRVDeserialization + MLIRSupport + MLIRTranslation + ) diff --git a/mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt b/mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt new file mode 100644 index 00000000000000..99d40e11baa619 --- /dev/null +++ b/mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt @@ -0,0 +1,17 @@ +add_mlir_translation_library(MLIRSPIRVDeserialization + DeserializeOps.cpp + Deserializer.cpp + Deserialization.cpp + + DEPENDS + MLIRSPIRVSerializationGen + + LINK_LIBS PUBLIC + MLIRIR + MLIRSPIRV + MLIRSPIRVBinaryUtils + MLIRSupport + MLIRTranslation + ) + + diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserialization.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserialization.cpp new file mode 100644 index 00000000000000..2eb08669f65876 --- /dev/null +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserialization.cpp @@ -0,0 +1,23 @@ +//===- Deserialization.cpp - MLIR SPIR-V Deserialization ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/SPIRV/Deserialization.h" + +#include "Deserializer.h" + +namespace mlir { +spirv::OwningSPIRVModuleRef spirv::deserialize(ArrayRef binary, + MLIRContext *context) { + Deserializer deserializer(binary, context); + + if (failed(deserializer.deserialize())) + return nullptr; + + return deserializer.collect(); +} +} // namespace mlir diff --git a/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp new file mode 100644 index 00000000000000..f11804a11a9a45 --- /dev/null +++ b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp @@ -0,0 +1,565 @@ +//===- DeserializeOps.cpp - MLIR SPIR-V Deserialization (Ops) -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Deserializer methods for SPIR-V binary instructions. +// +//===----------------------------------------------------------------------===// + +#include "Deserializer.h" + +#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Location.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +using namespace mlir; + +#define DEBUG_TYPE "spirv-deserialization" + +//===----------------------------------------------------------------------===// +// Utility Functions +//===----------------------------------------------------------------------===// + +/// Extracts the opcode from the given first word of a SPIR-V instruction. +static inline spirv::Opcode extractOpcode(uint32_t word) { + return static_cast(word & 0xffff); +} + +//===----------------------------------------------------------------------===// +// Instruction +//===----------------------------------------------------------------------===// + +Value spirv::Deserializer::getValue(uint32_t id) { + if (auto constInfo = getConstant(id)) { + // Materialize a `spv.constant` op at every use site. + return opBuilder.create(unknownLoc, constInfo->second, + constInfo->first); + } + if (auto varOp = getGlobalVariable(id)) { + auto addressOfOp = opBuilder.create( + unknownLoc, varOp.type(), + opBuilder.getSymbolRefAttr(varOp.getOperation())); + return addressOfOp.pointer(); + } + if (auto constOp = getSpecConstant(id)) { + auto referenceOfOp = opBuilder.create( + unknownLoc, constOp.default_value().getType(), + opBuilder.getSymbolRefAttr(constOp.getOperation())); + return referenceOfOp.reference(); + } + if (auto constCompositeOp = getSpecConstantComposite(id)) { + auto referenceOfOp = opBuilder.create( + unknownLoc, constCompositeOp.type(), + opBuilder.getSymbolRefAttr(constCompositeOp.getOperation())); + return referenceOfOp.reference(); + } + if (auto specConstOperationInfo = getSpecConstantOperation(id)) { + return materializeSpecConstantOperation( + id, specConstOperationInfo->enclodesOpcode, + specConstOperationInfo->resultTypeID, + specConstOperationInfo->enclosedOpOperands); + } + if (auto undef = getUndefType(id)) { + return opBuilder.create(unknownLoc, undef); + } + return valueMap.lookup(id); +} + +LogicalResult +spirv::Deserializer::sliceInstruction(spirv::Opcode &opcode, + ArrayRef &operands, + Optional expectedOpcode) { + auto binarySize = binary.size(); + if (curOffset >= binarySize) { + return emitError(unknownLoc, "expected ") + << (expectedOpcode ? spirv::stringifyOpcode(*expectedOpcode) + : "more") + << " instruction"; + } + + // For each instruction, get its word count from the first word to slice it + // from the stream properly, and then dispatch to the instruction handler. + + uint32_t wordCount = binary[curOffset] >> 16; + + if (wordCount == 0) + return emitError(unknownLoc, "word count cannot be zero"); + + uint32_t nextOffset = curOffset + wordCount; + if (nextOffset > binarySize) + return emitError(unknownLoc, "insufficient words for the last instruction"); + + opcode = extractOpcode(binary[curOffset]); + operands = binary.slice(curOffset + 1, wordCount - 1); + curOffset = nextOffset; + return success(); +} + +LogicalResult spirv::Deserializer::processInstruction( + spirv::Opcode opcode, ArrayRef operands, bool deferInstructions) { + LLVM_DEBUG(llvm::dbgs() << "[inst] processing instruction " + << spirv::stringifyOpcode(opcode) << "\n"); + + // First dispatch all the instructions whose opcode does not correspond to + // those that have a direct mirror in the SPIR-V dialect + switch (opcode) { + case spirv::Opcode::OpCapability: + return processCapability(operands); + case spirv::Opcode::OpExtension: + return processExtension(operands); + case spirv::Opcode::OpExtInst: + return processExtInst(operands); + case spirv::Opcode::OpExtInstImport: + return processExtInstImport(operands); + case spirv::Opcode::OpMemberName: + return processMemberName(operands); + case spirv::Opcode::OpMemoryModel: + return processMemoryModel(operands); + case spirv::Opcode::OpEntryPoint: + case spirv::Opcode::OpExecutionMode: + if (deferInstructions) { + deferredInstructions.emplace_back(opcode, operands); + return success(); + } + break; + case spirv::Opcode::OpVariable: + if (isa(opBuilder.getBlock()->getParentOp())) { + return processGlobalVariable(operands); + } + break; + case spirv::Opcode::OpLine: + return processDebugLine(operands); + case spirv::Opcode::OpNoLine: + return clearDebugLine(); + case spirv::Opcode::OpName: + return processName(operands); + case spirv::Opcode::OpString: + return processDebugString(operands); + case spirv::Opcode::OpModuleProcessed: + case spirv::Opcode::OpSource: + case spirv::Opcode::OpSourceContinued: + case spirv::Opcode::OpSourceExtension: + // TODO: This is debug information embedded in the binary which should be + // translated into the spv.module. + return success(); + case spirv::Opcode::OpTypeVoid: + case spirv::Opcode::OpTypeBool: + case spirv::Opcode::OpTypeInt: + case spirv::Opcode::OpTypeFloat: + case spirv::Opcode::OpTypeVector: + case spirv::Opcode::OpTypeMatrix: + case spirv::Opcode::OpTypeArray: + case spirv::Opcode::OpTypeFunction: + case spirv::Opcode::OpTypeRuntimeArray: + case spirv::Opcode::OpTypeStruct: + case spirv::Opcode::OpTypePointer: + case spirv::Opcode::OpTypeCooperativeMatrixNV: + return processType(opcode, operands); + case spirv::Opcode::OpTypeForwardPointer: + return processTypeForwardPointer(operands); + case spirv::Opcode::OpConstant: + return processConstant(operands, /*isSpec=*/false); + case spirv::Opcode::OpSpecConstant: + return processConstant(operands, /*isSpec=*/true); + case spirv::Opcode::OpConstantComposite: + return processConstantComposite(operands); + case spirv::Opcode::OpSpecConstantComposite: + return processSpecConstantComposite(operands); + case spirv::Opcode::OpSpecConstantOperation: + return processSpecConstantOperation(operands); + case spirv::Opcode::OpConstantTrue: + return processConstantBool(/*isTrue=*/true, operands, /*isSpec=*/false); + case spirv::Opcode::OpSpecConstantTrue: + return processConstantBool(/*isTrue=*/true, operands, /*isSpec=*/true); + case spirv::Opcode::OpConstantFalse: + return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/false); + case spirv::Opcode::OpSpecConstantFalse: + return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/true); + case spirv::Opcode::OpConstantNull: + return processConstantNull(operands); + case spirv::Opcode::OpDecorate: + return processDecoration(operands); + case spirv::Opcode::OpMemberDecorate: + return processMemberDecoration(operands); + case spirv::Opcode::OpFunction: + return processFunction(operands); + case spirv::Opcode::OpLabel: + return processLabel(operands); + case spirv::Opcode::OpBranch: + return processBranch(operands); + case spirv::Opcode::OpBranchConditional: + return processBranchConditional(operands); + case spirv::Opcode::OpSelectionMerge: + return processSelectionMerge(operands); + case spirv::Opcode::OpLoopMerge: + return processLoopMerge(operands); + case spirv::Opcode::OpPhi: + return processPhi(operands); + case spirv::Opcode::OpUndef: + return processUndef(operands); + default: + break; + } + return dispatchToAutogenDeserialization(opcode, operands); +} + +LogicalResult spirv::Deserializer::processOpWithoutGrammarAttr( + ArrayRef words, StringRef opName, bool hasResult, + unsigned numOperands) { + SmallVector resultTypes; + uint32_t valueID = 0; + + size_t wordIndex = 0; + if (hasResult) { + if (wordIndex >= words.size()) + return emitError(unknownLoc, + "expected result type while deserializing for ") + << opName; + + // Decode the type + auto type = getType(words[wordIndex]); + if (!type) + return emitError(unknownLoc, "unknown type result : ") + << words[wordIndex]; + resultTypes.push_back(type); + ++wordIndex; + + // Decode the result + if (wordIndex >= words.size()) + return emitError(unknownLoc, + "expected result while deserializing for ") + << opName; + valueID = words[wordIndex]; + ++wordIndex; + } + + SmallVector operands; + SmallVector attributes; + + // Decode operands + size_t operandIndex = 0; + for (; operandIndex < numOperands && wordIndex < words.size(); + ++operandIndex, ++wordIndex) { + auto arg = getValue(words[wordIndex]); + if (!arg) + return emitError(unknownLoc, "unknown result : ") << words[wordIndex]; + operands.push_back(arg); + } + if (operandIndex != numOperands) { + return emitError( + unknownLoc, + "found less operands than expected when deserializing for ") + << opName << "; only " << operandIndex << " of " << numOperands + << " processed"; + } + if (wordIndex != words.size()) { + return emitError( + unknownLoc, + "found more operands than expected when deserializing for ") + << opName << "; only " << wordIndex << " of " << words.size() + << " processed"; + } + + // Attach attributes from decorations + if (decorations.count(valueID)) { + auto attrs = decorations[valueID].getAttrs(); + attributes.append(attrs.begin(), attrs.end()); + } + + // Create the op and update bookkeeping maps + Location loc = createFileLineColLoc(opBuilder); + OperationState opState(loc, opName); + opState.addOperands(operands); + if (hasResult) + opState.addTypes(resultTypes); + opState.addAttributes(attributes); + Operation *op = opBuilder.createOperation(opState); + if (hasResult) + valueMap[valueID] = op->getResult(0); + + if (op->hasTrait()) + clearDebugLine(); + + return success(); +} + +LogicalResult spirv::Deserializer::processUndef(ArrayRef operands) { + if (operands.size() != 2) { + return emitError(unknownLoc, "OpUndef instruction must have two operands"); + } + auto type = getType(operands[0]); + if (!type) { + return emitError(unknownLoc, "unknown type with OpUndef instruction"); + } + undefMap[operands[1]] = type; + return success(); +} + +LogicalResult spirv::Deserializer::processExtInst(ArrayRef operands) { + if (operands.size() < 4) { + return emitError(unknownLoc, + "OpExtInst must have at least 4 operands, result type " + ", result , set and instruction opcode"); + } + if (!extendedInstSets.count(operands[2])) { + return emitError(unknownLoc, "undefined set in OpExtInst"); + } + SmallVector slicedOperands; + slicedOperands.append(operands.begin(), std::next(operands.begin(), 2)); + slicedOperands.append(std::next(operands.begin(), 4), operands.end()); + return dispatchToExtensionSetAutogenDeserialization( + extendedInstSets[operands[2]], operands[3], slicedOperands); +} + +namespace mlir { +namespace spirv { + +template <> +LogicalResult +Deserializer::processOp(ArrayRef words) { + unsigned wordIndex = 0; + if (wordIndex >= words.size()) { + return emitError(unknownLoc, + "missing Execution Model specification in OpEntryPoint"); + } + auto execModel = opBuilder.getI32IntegerAttr(words[wordIndex++]); + if (wordIndex >= words.size()) { + return emitError(unknownLoc, "missing in OpEntryPoint"); + } + // Get the function + auto fnID = words[wordIndex++]; + // Get the function name + auto fnName = decodeStringLiteral(words, wordIndex); + // Verify that the function matches the fnName + auto parsedFunc = getFunction(fnID); + if (!parsedFunc) { + return emitError(unknownLoc, "no function matching ") << fnID; + } + if (parsedFunc.getName() != fnName) { + return emitError(unknownLoc, "function name mismatch between OpEntryPoint " + "and OpFunction with ") + << fnID << ": " << fnName << " vs. " << parsedFunc.getName(); + } + SmallVector interface; + while (wordIndex < words.size()) { + auto arg = getGlobalVariable(words[wordIndex]); + if (!arg) { + return emitError(unknownLoc, "undefined result ") + << words[wordIndex] << " while decoding OpEntryPoint"; + } + interface.push_back(opBuilder.getSymbolRefAttr(arg.getOperation())); + wordIndex++; + } + opBuilder.create(unknownLoc, execModel, + opBuilder.getSymbolRefAttr(fnName), + opBuilder.getArrayAttr(interface)); + return success(); +} + +template <> +LogicalResult +Deserializer::processOp(ArrayRef words) { + unsigned wordIndex = 0; + if (wordIndex >= words.size()) { + return emitError(unknownLoc, + "missing function result in OpExecutionMode"); + } + // Get the function to get the name of the function + auto fnID = words[wordIndex++]; + auto fn = getFunction(fnID); + if (!fn) { + return emitError(unknownLoc, "no function matching ") << fnID; + } + // Get the Execution mode + if (wordIndex >= words.size()) { + return emitError(unknownLoc, "missing Execution Mode in OpExecutionMode"); + } + auto execMode = opBuilder.getI32IntegerAttr(words[wordIndex++]); + + // Get the values + SmallVector attrListElems; + while (wordIndex < words.size()) { + attrListElems.push_back(opBuilder.getI32IntegerAttr(words[wordIndex++])); + } + auto values = opBuilder.getArrayAttr(attrListElems); + opBuilder.create( + unknownLoc, opBuilder.getSymbolRefAttr(fn.getName()), execMode, values); + return success(); +} + +template <> +LogicalResult +Deserializer::processOp(ArrayRef operands) { + if (operands.size() != 3) { + return emitError( + unknownLoc, + "OpControlBarrier must have execution scope , memory scope " + "and memory semantics "); + } + + SmallVector argAttrs; + for (auto operand : operands) { + auto argAttr = getConstantInt(operand); + if (!argAttr) { + return emitError(unknownLoc, + "expected 32-bit integer constant from ") + << operand << " for OpControlBarrier"; + } + argAttrs.push_back(argAttr); + } + + opBuilder.create(unknownLoc, argAttrs[0], + argAttrs[1], argAttrs[2]); + return success(); +} + +template <> +LogicalResult +Deserializer::processOp(ArrayRef operands) { + if (operands.size() < 3) { + return emitError(unknownLoc, + "OpFunctionCall must have at least 3 operands"); + } + + Type resultType = getType(operands[0]); + if (!resultType) { + return emitError(unknownLoc, "undefined result type from ") + << operands[0]; + } + + // Use null type to mean no result type. + if (isVoidType(resultType)) + resultType = nullptr; + + auto resultID = operands[1]; + auto functionID = operands[2]; + + auto functionName = getFunctionSymbol(functionID); + + SmallVector arguments; + for (auto operand : llvm::drop_begin(operands, 3)) { + auto value = getValue(operand); + if (!value) { + return emitError(unknownLoc, "unknown ") + << operand << " used by OpFunctionCall"; + } + arguments.push_back(value); + } + + auto opFunctionCall = opBuilder.create( + unknownLoc, resultType, opBuilder.getSymbolRefAttr(functionName), + arguments); + + if (resultType) + valueMap[resultID] = opFunctionCall.getResult(0); + return success(); +} + +template <> +LogicalResult +Deserializer::processOp(ArrayRef operands) { + if (operands.size() != 2) { + return emitError(unknownLoc, "OpMemoryBarrier must have memory scope " + "and memory semantics "); + } + + SmallVector argAttrs; + for (auto operand : operands) { + auto argAttr = getConstantInt(operand); + if (!argAttr) { + return emitError(unknownLoc, + "expected 32-bit integer constant from ") + << operand << " for OpMemoryBarrier"; + } + argAttrs.push_back(argAttr); + } + + opBuilder.create(unknownLoc, argAttrs[0], + argAttrs[1]); + return success(); +} + +template <> +LogicalResult +Deserializer::processOp(ArrayRef words) { + SmallVector resultTypes; + size_t wordIndex = 0; + SmallVector operands; + SmallVector attributes; + + if (wordIndex < words.size()) { + auto arg = getValue(words[wordIndex]); + + if (!arg) { + return emitError(unknownLoc, "unknown result : ") + << words[wordIndex]; + } + + operands.push_back(arg); + wordIndex++; + } + + if (wordIndex < words.size()) { + auto arg = getValue(words[wordIndex]); + + if (!arg) { + return emitError(unknownLoc, "unknown result : ") + << words[wordIndex]; + } + + operands.push_back(arg); + wordIndex++; + } + + bool isAlignedAttr = false; + + if (wordIndex < words.size()) { + auto attrValue = words[wordIndex++]; + attributes.push_back(opBuilder.getNamedAttr( + "memory_access", opBuilder.getI32IntegerAttr(attrValue))); + isAlignedAttr = (attrValue == 2); + } + + if (isAlignedAttr && wordIndex < words.size()) { + attributes.push_back(opBuilder.getNamedAttr( + "alignment", opBuilder.getI32IntegerAttr(words[wordIndex++]))); + } + + if (wordIndex < words.size()) { + attributes.push_back(opBuilder.getNamedAttr( + "source_memory_access", + opBuilder.getI32IntegerAttr(words[wordIndex++]))); + } + + if (wordIndex < words.size()) { + attributes.push_back(opBuilder.getNamedAttr( + "source_alignment", opBuilder.getI32IntegerAttr(words[wordIndex++]))); + } + + if (wordIndex != words.size()) { + return emitError(unknownLoc, + "found more operands than expected when deserializing " + "spirv::CopyMemoryOp, only ") + << wordIndex << " of " << words.size() << " processed"; + } + + Location loc = createFileLineColLoc(opBuilder); + opBuilder.create(loc, resultTypes, operands, attributes); + + return success(); +} + +// Pull in auto-generated Deserializer::dispatchToAutogenDeserialization() and +// various Deserializer::processOp<...>() specializations. +#define GET_DESERIALIZATION_FNS +#include "mlir/Dialect/SPIRV/IR/SPIRVSerialization.inc" + +} // namespace spirv +} // namespace mlir diff --git a/mlir/lib/Target/SPIRV/Deserialization.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp similarity index 58% rename from mlir/lib/Target/SPIRV/Deserialization.cpp rename to mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp index 07eb3d35e0a484..5ce169a0d47f4b 100644 --- a/mlir/lib/Target/SPIRV/Deserialization.cpp +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp @@ -1,4 +1,4 @@ -//===- Deserializer.cpp - MLIR SPIR-V Deserialization ---------------------===// +//===- Deserializer.cpp - MLIR SPIR-V Deserializer ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// // -// This file defines the SPIR-V binary to MLIR SPIR-V module deserialization. +// This file defines the SPIR-V binary to MLIR SPIR-V module deserializer. // //===----------------------------------------------------------------------===// -#include "mlir/Target/SPIRV/Deserialization.h" +#include "Deserializer.h" #include "mlir/Dialect/SPIRV/IR/SPIRVAttributes.h" #include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" @@ -24,7 +24,6 @@ #include "mlir/Target/SPIRV/SPIRVBinaryUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/bit.h" @@ -40,607 +39,22 @@ using namespace mlir; // Utility Functions //===----------------------------------------------------------------------===// -/// Decodes a string literal in `words` starting at `wordIndex`. Update the -/// latter to point to the position in words after the string literal. -static inline StringRef decodeStringLiteral(ArrayRef words, - unsigned &wordIndex) { - StringRef str(reinterpret_cast(words.data() + wordIndex)); - wordIndex += str.size() / 4 + 1; - return str; -} - -/// Extracts the opcode from the given first word of a SPIR-V instruction. -static inline spirv::Opcode extractOpcode(uint32_t word) { - return static_cast(word & 0xffff); -} - /// Returns true if the given `block` is a function entry block. static inline bool isFnEntryBlock(Block *block) { return block->isEntryBlock() && isa_and_nonnull(block->getParentOp()); } -namespace { -//===----------------------------------------------------------------------===// -// Utility Definitions -//===----------------------------------------------------------------------===// - -/// A struct for containing a header block's merge and continue targets. -/// -/// This struct is used to track original structured control flow info from -/// SPIR-V blob. This info will be used to create spv.selection/spv.loop -/// later. -struct BlockMergeInfo { - Block *mergeBlock; - Block *continueBlock; // nullptr for spv.selection - Location loc; - uint32_t control; - - BlockMergeInfo(Location location, uint32_t control) - : mergeBlock(nullptr), continueBlock(nullptr), loc(location), - control(control) {} - BlockMergeInfo(Location location, uint32_t control, Block *m, - Block *c = nullptr) - : mergeBlock(m), continueBlock(c), loc(location), control(control) {} -}; - -/// A struct for containing OpLine instruction information. -struct DebugLine { - uint32_t fileID; - uint32_t line; - uint32_t col; - - DebugLine(uint32_t fileIDNum, uint32_t lineNum, uint32_t colNum) - : fileID(fileIDNum), line(lineNum), col(colNum) {} -}; - -/// Map from a selection/loop's header block to its merge (and continue) target. -using BlockMergeInfoMap = DenseMap; - -/// A "deferred struct type" is a struct type with one or more member types not -/// known when the Deserializer first encounters the struct. This happens, for -/// example, with recursive structs where a pointer to the struct type is -/// forward declared through OpTypeForwardPointer in the SPIR-V module before -/// the struct declaration; the actual pointer to struct type should be defined -/// later through an OpTypePointer. For example, the following C struct: -/// -/// struct A { -/// A* next; -/// }; -/// -/// would be represented in the SPIR-V module as: -/// -/// OpName %A "A" -/// OpTypeForwardPointer %APtr Generic -/// %A = OpTypeStruct %APtr -/// %APtr = OpTypePointer Generic %A -/// -/// This means that the spirv::StructType cannot be fully constructed directly -/// when the Deserializer encounters it. Instead we create a -/// DeferredStructTypeInfo that contains all the information we know about the -/// spirv::StructType. Once all forward references for the struct are resolved, -/// the struct's body is set with all member info. -struct DeferredStructTypeInfo { - spirv::StructType deferredStructType; - - // A list of all unresolved member types for the struct. First element of each - // item is operand ID, second element is member index in the struct. - SmallVector, 0> unresolvedMemberTypes; - - // The list of member types. For unresolved members, this list contains - // place-holder empty types that will be updated later. - SmallVector memberTypes; - SmallVector offsetInfo; - SmallVector memberDecorationsInfo; -}; - -/// A struct that collects the info needed to materialize/emit a -/// SpecConstantOperation op. -struct SpecConstOperationMaterializationInfo { - spirv::Opcode enclodesOpcode; - uint32_t resultTypeID; - SmallVector enclosedOpOperands; -}; - -//===----------------------------------------------------------------------===// -// Deserializer Declaration -//===----------------------------------------------------------------------===// - -/// A SPIR-V module serializer. -/// -/// A SPIR-V binary module is a single linear stream of instructions; each -/// instruction is composed of 32-bit words. The first word of an instruction -/// records the total number of words of that instruction using the 16 -/// higher-order bits. So this deserializer uses that to get instruction -/// boundary and parse instructions and build a SPIR-V ModuleOp gradually. -/// -// TODO: clean up created ops on errors -class Deserializer { -public: - /// Creates a deserializer for the given SPIR-V `binary` module. - /// The SPIR-V ModuleOp will be created into `context. - explicit Deserializer(ArrayRef binary, MLIRContext *context); - - /// Deserializes the remembered SPIR-V binary module. - LogicalResult deserialize(); - - /// Collects the final SPIR-V ModuleOp. - spirv::OwningSPIRVModuleRef collect(); - -private: - //===--------------------------------------------------------------------===// - // Module structure - //===--------------------------------------------------------------------===// - - /// Initializes the `module` ModuleOp in this deserializer instance. - spirv::OwningSPIRVModuleRef createModuleOp(); - - /// Processes SPIR-V module header in `binary`. - LogicalResult processHeader(); - - /// Processes the SPIR-V OpCapability with `operands` and updates bookkeeping - /// in the deserializer. - LogicalResult processCapability(ArrayRef operands); - - /// Processes the SPIR-V OpExtension with `operands` and updates bookkeeping - /// in the deserializer. - LogicalResult processExtension(ArrayRef words); - - /// Processes the SPIR-V OpExtInstImport with `operands` and updates - /// bookkeeping in the deserializer. - LogicalResult processExtInstImport(ArrayRef words); - - /// Attaches (version, capabilities, extensions) triple to `module` as an - /// attribute. - void attachVCETriple(); - - /// Processes the SPIR-V OpMemoryModel with `operands` and updates `module`. - LogicalResult processMemoryModel(ArrayRef operands); - - /// Process SPIR-V OpName with `operands`. - LogicalResult processName(ArrayRef operands); - - /// Processes an OpDecorate instruction. - LogicalResult processDecoration(ArrayRef words); - - // Processes an OpMemberDecorate instruction. - LogicalResult processMemberDecoration(ArrayRef words); - - /// Processes an OpMemberName instruction. - LogicalResult processMemberName(ArrayRef words); - - /// Gets the function op associated with a result of OpFunction. - spirv::FuncOp getFunction(uint32_t id) { return funcMap.lookup(id); } - - /// Processes the SPIR-V function at the current `offset` into `binary`. - /// The operands to the OpFunction instruction is passed in as ``operands`. - /// This method processes each instruction inside the function and dispatches - /// them to their handler method accordingly. - LogicalResult processFunction(ArrayRef operands); - - /// Processes OpFunctionEnd and finalizes function. This wires up block - /// argument created from OpPhi instructions and also structurizes control - /// flow. - LogicalResult processFunctionEnd(ArrayRef operands); - - /// Gets the constant's attribute and type associated with the given . - Optional> getConstant(uint32_t id); - - /// Gets the info needed to materialize the spec constant operation op - /// associated with the given . - Optional - getSpecConstantOperation(uint32_t id); - - /// Gets the constant's integer attribute with the given . Returns a - /// null IntegerAttr if the given is not registered or does not correspond - /// to an integer constant. - IntegerAttr getConstantInt(uint32_t id); - - /// Returns a symbol to be used for the function name with the given - /// result . This tries to use the function's OpName if - /// exists; otherwise creates one based on the . - std::string getFunctionSymbol(uint32_t id); - - /// Returns a symbol to be used for the specialization constant with the given - /// result . This tries to use the specialization constant's OpName if - /// exists; otherwise creates one based on the . - std::string getSpecConstantSymbol(uint32_t id); - - /// Gets the specialization constant with the given result . - spirv::SpecConstantOp getSpecConstant(uint32_t id) { - return specConstMap.lookup(id); - } - - /// Gets the composite specialization constant with the given result . - spirv::SpecConstantCompositeOp getSpecConstantComposite(uint32_t id) { - return specConstCompositeMap.lookup(id); - } - - /// Creates a spirv::SpecConstantOp. - spirv::SpecConstantOp createSpecConstant(Location loc, uint32_t resultID, - Attribute defaultValue); - - /// Processes the OpVariable instructions at current `offset` into `binary`. - /// It is expected that this method is used for variables that are to be - /// defined at module scope and will be deserialized into a spv.globalVariable - /// instruction. - LogicalResult processGlobalVariable(ArrayRef operands); - - /// Gets the global variable associated with a result of OpVariable. - spirv::GlobalVariableOp getGlobalVariable(uint32_t id) { - return globalVariableMap.lookup(id); - } - - //===--------------------------------------------------------------------===// - // Type - //===--------------------------------------------------------------------===// - - /// Gets type for a given result . - Type getType(uint32_t id) { return typeMap.lookup(id); } - - /// Get the type associated with the result of an OpUndef. - Type getUndefType(uint32_t id) { return undefMap.lookup(id); } - - /// Returns true if the given `type` is for SPIR-V void type. - bool isVoidType(Type type) const { return type.isa(); } - - /// Processes a SPIR-V type instruction with given `opcode` and `operands` and - /// registers the type into `module`. - LogicalResult processType(spirv::Opcode opcode, ArrayRef operands); - - LogicalResult processOpTypePointer(ArrayRef operands); - - LogicalResult processArrayType(ArrayRef operands); - - LogicalResult processCooperativeMatrixType(ArrayRef operands); - - LogicalResult processFunctionType(ArrayRef operands); - - LogicalResult processRuntimeArrayType(ArrayRef operands); - - LogicalResult processStructType(ArrayRef operands); - - LogicalResult processMatrixType(ArrayRef operands); - - //===--------------------------------------------------------------------===// - // Constant - //===--------------------------------------------------------------------===// - - /// Processes a SPIR-V Op{|Spec}Constant instruction with the given - /// `operands`. `isSpec` indicates whether this is a specialization constant. - LogicalResult processConstant(ArrayRef operands, bool isSpec); - - /// Processes a SPIR-V Op{|Spec}Constant{True|False} instruction with the - /// given `operands`. `isSpec` indicates whether this is a specialization - /// constant. - LogicalResult processConstantBool(bool isTrue, ArrayRef operands, - bool isSpec); - - /// Processes a SPIR-V OpConstantComposite instruction with the given - /// `operands`. - LogicalResult processConstantComposite(ArrayRef operands); - - /// Processes a SPIR-V OpSpecConstantComposite instruction with the given - /// `operands`. - LogicalResult processSpecConstantComposite(ArrayRef operands); - - /// Processes a SPIR-V OpSpecConstantOperation instruction with the given - /// `operands`. - LogicalResult processSpecConstantOperation(ArrayRef operands); - - /// Materializes/emits an OpSpecConstantOperation instruction. - Value materializeSpecConstantOperation(uint32_t resultID, - spirv::Opcode enclosedOpcode, - uint32_t resultTypeID, - ArrayRef enclosedOpOperands); - - /// Processes a SPIR-V OpConstantNull instruction with the given `operands`. - LogicalResult processConstantNull(ArrayRef operands); - - //===--------------------------------------------------------------------===// - // Debug - //===--------------------------------------------------------------------===// - - /// Discontinues any source-level location information that might be active - /// from a previous OpLine instruction. - LogicalResult clearDebugLine(); - - /// Creates a FileLineColLoc with the OpLine location information. - Location createFileLineColLoc(OpBuilder opBuilder); - - /// Processes a SPIR-V OpLine instruction with the given `operands`. - LogicalResult processDebugLine(ArrayRef operands); - - /// Processes a SPIR-V OpString instruction with the given `operands`. - LogicalResult processDebugString(ArrayRef operands); - - //===--------------------------------------------------------------------===// - // Control flow - //===--------------------------------------------------------------------===// - - /// Returns the block for the given label . - Block *getBlock(uint32_t id) const { return blockMap.lookup(id); } - - // In SPIR-V, structured control flow is explicitly declared using merge - // instructions (OpSelectionMerge and OpLoopMerge). In the SPIR-V dialect, - // we use spv.selection and spv.loop to group structured control flow. - // The deserializer need to turn structured control flow marked with merge - // instructions into using spv.selection/spv.loop ops. - // - // Because structured control flow can nest and the basic block order have - // flexibility, we cannot isolate a structured selection/loop without - // deserializing all the blocks. So we use the following approach: - // - // 1. Deserialize all basic blocks in a function and create MLIR blocks for - // them into the function's region. In the meanwhile, keep a map between - // selection/loop header blocks to their corresponding merge (and continue) - // target blocks. - // 2. For each selection/loop header block, recursively get all basic blocks - // reachable (except the merge block) and put them in a newly created - // spv.selection/spv.loop's region. Structured control flow guarantees - // that we enter and exit in structured ways and the construct is nestable. - // 3. Put the new spv.selection/spv.loop op at the beginning of the old merge - // block and redirect all branches to the old header block to the old - // merge block (which contains the spv.selection/spv.loop op now). - - /// For OpPhi instructions, we use block arguments to represent them. OpPhi - /// encodes a list of (value, predecessor) pairs. At the time of handling the - /// block containing an OpPhi instruction, the predecessor block might not be - /// processed yet, also the value sent by it. So we need to defer handling - /// the block argument from the predecessors. We use the following approach: - /// - /// 1. For each OpPhi instruction, add a block argument to the current block - /// in construction. Record the block argument in `valueMap` so its uses - /// can be resolved. For the list of (value, predecessor) pairs, update - /// `blockPhiInfo` for bookkeeping. - /// 2. After processing all blocks, loop over `blockPhiInfo` to fix up each - /// block recorded there to create the proper block arguments on their - /// terminators. - - /// A data structure for containing a SPIR-V block's phi info. It will be - /// represented as block argument in SPIR-V dialect. - using BlockPhiInfo = - SmallVector; // The result of the values sent - - /// Gets or creates the block corresponding to the given label . The newly - /// created block will always be placed at the end of the current function. - Block *getOrCreateBlock(uint32_t id); - - LogicalResult processBranch(ArrayRef operands); - - LogicalResult processBranchConditional(ArrayRef operands); - - /// Processes a SPIR-V OpLabel instruction with the given `operands`. - LogicalResult processLabel(ArrayRef operands); - - /// Processes a SPIR-V OpSelectionMerge instruction with the given `operands`. - LogicalResult processSelectionMerge(ArrayRef operands); - - /// Processes a SPIR-V OpLoopMerge instruction with the given `operands`. - LogicalResult processLoopMerge(ArrayRef operands); - - /// Processes a SPIR-V OpPhi instruction with the given `operands`. - LogicalResult processPhi(ArrayRef operands); - - /// Creates block arguments on predecessors previously recorded when handling - /// OpPhi instructions. - LogicalResult wireUpBlockArgument(); - - /// Extracts blocks belonging to a structured selection/loop into a - /// spv.selection/spv.loop op. This method iterates until all blocks - /// declared as selection/loop headers are handled. - LogicalResult structurizeControlFlow(); - - //===--------------------------------------------------------------------===// - // Instruction - //===--------------------------------------------------------------------===// - - /// Get the Value associated with a result . - /// - /// This method materializes normal constants and inserts "casting" ops - /// (`spv.mlir.addressof` and `spv.mlir.referenceof`) to turn an symbol into a - /// SSA value for handling uses of module scope constants/variables in - /// functions. - Value getValue(uint32_t id); - - /// Slices the first instruction out of `binary` and returns its opcode and - /// operands via `opcode` and `operands` respectively. Returns failure if - /// there is no more remaining instructions (`expectedOpcode` will be used to - /// compose the error message) or the next instruction is malformed. - LogicalResult - sliceInstruction(spirv::Opcode &opcode, ArrayRef &operands, - Optional expectedOpcode = llvm::None); - - /// Processes a SPIR-V instruction with the given `opcode` and `operands`. - /// This method is the main entrance for handling SPIR-V instruction; it - /// checks the instruction opcode and dispatches to the corresponding handler. - /// Processing of Some instructions (like OpEntryPoint and OpExecutionMode) - /// might need to be deferred, since they contain forward references to s - /// in the deserialized binary, but module in SPIR-V dialect expects these to - /// be ssa-uses. - LogicalResult processInstruction(spirv::Opcode opcode, - ArrayRef operands, - bool deferInstructions = true); - - /// Processes a SPIR-V instruction from the given `operands`. It should - /// deserialize into an op with the given `opName` and `numOperands`. - /// This method is a generic one for dispatching any SPIR-V ops without - /// variadic operands and attributes in TableGen definitions. - LogicalResult processOpWithoutGrammarAttr(ArrayRef words, - StringRef opName, bool hasResult, - unsigned numOperands); - - /// Processes a OpUndef instruction. Adds a spv.Undef operation at the current - /// insertion point. - LogicalResult processUndef(ArrayRef operands); - - LogicalResult processTypeForwardPointer(ArrayRef operands); - - /// Method to dispatch to the specialized deserialization function for an - /// operation in SPIR-V dialect that is a mirror of an instruction in the - /// SPIR-V spec. This is auto-generated from ODS. Dispatch is handled for - /// all operations in SPIR-V dialect that have hasOpcode == 1. - LogicalResult dispatchToAutogenDeserialization(spirv::Opcode opcode, - ArrayRef words); - - /// Processes a SPIR-V OpExtInst with given `operands`. This slices the - /// entries of `operands` that specify the extended instruction set and - /// the instruction opcode. The op deserializer is then invoked using the - /// other entries. - LogicalResult processExtInst(ArrayRef operands); - - /// Dispatches the deserialization of extended instruction set operation based - /// on the extended instruction set name, and instruction opcode. This is - /// autogenerated from ODS. - LogicalResult - dispatchToExtensionSetAutogenDeserialization(StringRef extensionSetName, - uint32_t instructionID, - ArrayRef words); - - /// Method to deserialize an operation in the SPIR-V dialect that is a mirror - /// of an instruction in the SPIR-V spec. This is auto generated if hasOpcode - /// == 1 and autogenSerialization == 1 in ODS. - template - LogicalResult processOp(ArrayRef words) { - return emitError(unknownLoc, "unsupported deserialization for ") - << OpTy::getOperationName() << " op"; - } - -private: - /// The SPIR-V binary module. - ArrayRef binary; - - /// Contains the data of the OpLine instruction which precedes the current - /// processing instruction. - llvm::Optional debugLine; - - /// The current word offset into the binary module. - unsigned curOffset = 0; - - /// MLIRContext to create SPIR-V ModuleOp into. - MLIRContext *context; - - // TODO: create Location subclass for binary blob - Location unknownLoc; - - /// The SPIR-V ModuleOp. - spirv::OwningSPIRVModuleRef module; - - /// The current function under construction. - Optional curFunction; - - /// The current block under construction. - Block *curBlock = nullptr; - - OpBuilder opBuilder; - - spirv::Version version; - - /// The list of capabilities used by the module. - llvm::SmallSetVector capabilities; - - /// The list of extensions used by the module. - llvm::SmallSetVector extensions; - - // Result to type mapping. - DenseMap typeMap; - - // Result to constant attribute and type mapping. - /// - /// In the SPIR-V binary format, all constants are placed in the module and - /// shared by instructions at module level and in subsequent functions. But in - /// the SPIR-V dialect, we materialize the constant to where it's used in the - /// function. So when seeing a constant instruction in the binary format, we - /// don't immediately emit a constant op into the module, we keep its value - /// (and type) here. Later when it's used, we materialize the constant. - DenseMap> constantMap; - - // Result to spec constant mapping. - DenseMap specConstMap; - - // Result to composite spec constant mapping. - DenseMap specConstCompositeMap; - - /// Result to info needed to materialize an OpSpecConstantOperation - /// mapping. - DenseMap - specConstOperationMap; - - // Result to variable mapping. - DenseMap globalVariableMap; - - // Result to function mapping. - DenseMap funcMap; - - // Result to block mapping. - DenseMap blockMap; - - // Header block to its merge (and continue) target mapping. - BlockMergeInfoMap blockMergeInfo; - - // Block to its phi (block argument) mapping. - DenseMap blockPhiInfo; - - // Result to value mapping. - DenseMap valueMap; - - // Mapping from result to undef value of a type. - DenseMap undefMap; - - // Result to name mapping. - DenseMap nameMap; - - // Result to debug info mapping. - DenseMap debugInfoMap; - - // Result to decorations mapping. - DenseMap decorations; - - // Result to type decorations. - DenseMap typeDecorations; - - // Result to member decorations. - // decorated-struct-type- -> - // (struct-member-index -> (decoration -> decoration-operands)) - DenseMap>>> - memberDecorationMap; - - // Result to member name. - // struct-type- -> (struct-member-index -> name) - DenseMap> memberNameMap; - - // Result to extended instruction set name. - DenseMap extendedInstSets; - - // List of instructions that are processed in a deferred fashion (after an - // initial processing of the entire binary). Some operations like - // OpEntryPoint, and OpExecutionMode use forward references to function - // s. In SPIR-V dialect the corresponding operations (spv.EntryPoint and - // spv.ExecutionMode) need these references resolved. So these instructions - // are deserialized and stored for processing once the entire binary is - // processed. - SmallVector>, 4> - deferredInstructions; - - /// A list of IDs for all types forward-declared through OpTypeForwardPointer - /// instructions. - llvm::SetVector typeForwardPointerIDs; - - /// A list of all structs which have unresolved member types. - SmallVector deferredStructTypesInfos; -}; -} // namespace - //===----------------------------------------------------------------------===// // Deserializer Method Definitions //===----------------------------------------------------------------------===// -Deserializer::Deserializer(ArrayRef binary, MLIRContext *context) +spirv::Deserializer::Deserializer(ArrayRef binary, + MLIRContext *context) : binary(binary), context(context), unknownLoc(UnknownLoc::get(context)), module(createModuleOp()), opBuilder(module->body()) {} -LogicalResult Deserializer::deserialize() { +LogicalResult spirv::Deserializer::deserialize() { LLVM_DEBUG(llvm::dbgs() << "+++ starting deserialization +++\n"); if (failed(processHeader())) @@ -674,7 +88,7 @@ LogicalResult Deserializer::deserialize() { return success(); } -spirv::OwningSPIRVModuleRef Deserializer::collect() { +spirv::OwningSPIRVModuleRef spirv::Deserializer::collect() { return std::move(module); } @@ -682,14 +96,14 @@ spirv::OwningSPIRVModuleRef Deserializer::collect() { // Module structure //===----------------------------------------------------------------------===// -spirv::OwningSPIRVModuleRef Deserializer::createModuleOp() { +spirv::OwningSPIRVModuleRef spirv::Deserializer::createModuleOp() { OpBuilder builder(context); OperationState state(unknownLoc, spirv::ModuleOp::getOperationName()); spirv::ModuleOp::build(builder, state); return cast(Operation::create(state)); } -LogicalResult Deserializer::processHeader() { +LogicalResult spirv::Deserializer::processHeader() { if (binary.size() < spirv::kHeaderWordCount) return emitError(unknownLoc, "SPIR-V binary module must have a 5-word header"); @@ -728,7 +142,8 @@ LogicalResult Deserializer::processHeader() { return success(); } -LogicalResult Deserializer::processCapability(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processCapability(ArrayRef operands) { if (operands.size() != 1) return emitError(unknownLoc, "OpMemoryModel must have one parameter"); @@ -740,7 +155,7 @@ LogicalResult Deserializer::processCapability(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processExtension(ArrayRef words) { +LogicalResult spirv::Deserializer::processExtension(ArrayRef words) { if (words.empty()) { return emitError( unknownLoc, @@ -760,7 +175,8 @@ LogicalResult Deserializer::processExtension(ArrayRef words) { return success(); } -LogicalResult Deserializer::processExtInstImport(ArrayRef words) { +LogicalResult +spirv::Deserializer::processExtInstImport(ArrayRef words) { if (words.size() < 2) { return emitError(unknownLoc, "OpExtInstImport must have a result and a literal " @@ -776,14 +192,15 @@ LogicalResult Deserializer::processExtInstImport(ArrayRef words) { return success(); } -void Deserializer::attachVCETriple() { +void spirv::Deserializer::attachVCETriple() { (*module)->setAttr( spirv::ModuleOp::getVCETripleAttrName(), spirv::VerCapExtAttr::get(version, capabilities.getArrayRef(), extensions.getArrayRef(), context)); } -LogicalResult Deserializer::processMemoryModel(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processMemoryModel(ArrayRef operands) { if (operands.size() != 2) return emitError(unknownLoc, "OpMemoryModel must have two operands"); @@ -797,7 +214,7 @@ LogicalResult Deserializer::processMemoryModel(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processDecoration(ArrayRef words) { +LogicalResult spirv::Deserializer::processDecoration(ArrayRef words) { // TODO: This function should also be auto-generated. For now, since only a // few decorations are processed/handled in a meaningful manner, going with a // manual implementation. @@ -871,7 +288,8 @@ LogicalResult Deserializer::processDecoration(ArrayRef words) { return success(); } -LogicalResult Deserializer::processMemberDecoration(ArrayRef words) { +LogicalResult +spirv::Deserializer::processMemberDecoration(ArrayRef words) { // The binary layout of OpMemberDecorate is different comparing to OpDecorate if (words.size() < 3) { return emitError(unknownLoc, @@ -892,7 +310,7 @@ LogicalResult Deserializer::processMemberDecoration(ArrayRef words) { return success(); } -LogicalResult Deserializer::processMemberName(ArrayRef words) { +LogicalResult spirv::Deserializer::processMemberName(ArrayRef words) { if (words.size() < 3) { return emitError(unknownLoc, "OpMemberName must have at least 3 operands"); } @@ -906,7 +324,8 @@ LogicalResult Deserializer::processMemberName(ArrayRef words) { return success(); } -LogicalResult Deserializer::processFunction(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processFunction(ArrayRef operands) { if (curFunction) { return emitError(unknownLoc, "found function inside function"); } @@ -1043,7 +462,8 @@ LogicalResult Deserializer::processFunction(ArrayRef operands) { return processFunctionEnd(instOperands); } -LogicalResult Deserializer::processFunctionEnd(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processFunctionEnd(ArrayRef operands) { // Process OpFunctionEnd. if (!operands.empty()) { return emitError(unknownLoc, "unexpected operands for OpFunctionEnd"); @@ -1061,22 +481,23 @@ LogicalResult Deserializer::processFunctionEnd(ArrayRef operands) { return success(); } -Optional> Deserializer::getConstant(uint32_t id) { +Optional> +spirv::Deserializer::getConstant(uint32_t id) { auto constIt = constantMap.find(id); if (constIt == constantMap.end()) return llvm::None; return constIt->getSecond(); } -Optional -Deserializer::getSpecConstantOperation(uint32_t id) { +Optional +spirv::Deserializer::getSpecConstantOperation(uint32_t id) { auto constIt = specConstOperationMap.find(id); if (constIt == specConstOperationMap.end()) return llvm::None; return constIt->getSecond(); } -std::string Deserializer::getFunctionSymbol(uint32_t id) { +std::string spirv::Deserializer::getFunctionSymbol(uint32_t id) { auto funcName = nameMap.lookup(id).str(); if (funcName.empty()) { funcName = "spirv_fn_" + std::to_string(id); @@ -1084,7 +505,7 @@ std::string Deserializer::getFunctionSymbol(uint32_t id) { return funcName; } -std::string Deserializer::getSpecConstantSymbol(uint32_t id) { +std::string spirv::Deserializer::getSpecConstantSymbol(uint32_t id) { auto constName = nameMap.lookup(id).str(); if (constName.empty()) { constName = "spirv_spec_const_" + std::to_string(id); @@ -1092,9 +513,9 @@ std::string Deserializer::getSpecConstantSymbol(uint32_t id) { return constName; } -spirv::SpecConstantOp Deserializer::createSpecConstant(Location loc, - uint32_t resultID, - Attribute defaultValue) { +spirv::SpecConstantOp +spirv::Deserializer::createSpecConstant(Location loc, uint32_t resultID, + Attribute defaultValue) { auto symName = opBuilder.getStringAttr(getSpecConstantSymbol(resultID)); auto op = opBuilder.create(unknownLoc, symName, defaultValue); @@ -1106,7 +527,8 @@ spirv::SpecConstantOp Deserializer::createSpecConstant(Location loc, return op; } -LogicalResult Deserializer::processGlobalVariable(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processGlobalVariable(ArrayRef operands) { unsigned wordIndex = 0; if (operands.size() < 3) { return emitError( @@ -1177,7 +599,7 @@ LogicalResult Deserializer::processGlobalVariable(ArrayRef operands) { return success(); } -IntegerAttr Deserializer::getConstantInt(uint32_t id) { +IntegerAttr spirv::Deserializer::getConstantInt(uint32_t id) { auto constInfo = getConstant(id); if (!constInfo) { return nullptr; @@ -1185,7 +607,7 @@ IntegerAttr Deserializer::getConstantInt(uint32_t id) { return constInfo->first.dyn_cast(); } -LogicalResult Deserializer::processName(ArrayRef operands) { +LogicalResult spirv::Deserializer::processName(ArrayRef operands) { if (operands.size() < 2) { return emitError(unknownLoc, "OpName needs at least 2 operands"); } @@ -1207,8 +629,8 @@ LogicalResult Deserializer::processName(ArrayRef operands) { // Type //===----------------------------------------------------------------------===// -LogicalResult Deserializer::processType(spirv::Opcode opcode, - ArrayRef operands) { +LogicalResult spirv::Deserializer::processType(spirv::Opcode opcode, + ArrayRef operands) { if (operands.empty()) { return emitError(unknownLoc, "type instruction with opcode ") << spirv::stringifyOpcode(opcode) << " needs at least one "; @@ -1303,7 +725,8 @@ LogicalResult Deserializer::processType(spirv::Opcode opcode, return success(); } -LogicalResult Deserializer::processOpTypePointer(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processOpTypePointer(ArrayRef operands) { if (operands.size() != 3) return emitError(unknownLoc, "OpTypePointer must have two parameters"); @@ -1356,7 +779,8 @@ LogicalResult Deserializer::processOpTypePointer(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processArrayType(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processArrayType(ArrayRef operands) { if (operands.size() != 3) { return emitError(unknownLoc, "OpTypeArray must have element type and count parameters"); @@ -1388,7 +812,8 @@ LogicalResult Deserializer::processArrayType(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processFunctionType(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processFunctionType(ArrayRef operands) { assert(!operands.empty() && "No operands for processing function type"); if (operands.size() == 1) { return emitError(unknownLoc, "missing return type for OpTypeFunction"); @@ -1414,7 +839,7 @@ LogicalResult Deserializer::processFunctionType(ArrayRef operands) { } LogicalResult -Deserializer::processCooperativeMatrixType(ArrayRef operands) { +spirv::Deserializer::processCooperativeMatrixType(ArrayRef operands) { if (operands.size() != 5) { return emitError(unknownLoc, "OpTypeCooperativeMatrix must have element " "type and row x column parameters"); @@ -1443,7 +868,7 @@ Deserializer::processCooperativeMatrixType(ArrayRef operands) { } LogicalResult -Deserializer::processRuntimeArrayType(ArrayRef operands) { +spirv::Deserializer::processRuntimeArrayType(ArrayRef operands) { if (operands.size() != 2) { return emitError(unknownLoc, "OpTypeRuntimeArray must have two operands"); } @@ -1458,7 +883,8 @@ Deserializer::processRuntimeArrayType(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processStructType(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processStructType(ArrayRef operands) { // TODO: Find a way to handle identified structs when debug info is stripped. if (operands.empty()) { @@ -1545,7 +971,8 @@ LogicalResult Deserializer::processStructType(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processMatrixType(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processMatrixType(ArrayRef operands) { if (operands.size() != 3) { // Three operands are needed: result_id, column_type, and column_count return emitError(unknownLoc, "OpTypeMatrix must have 3 operands" @@ -1564,12 +991,25 @@ LogicalResult Deserializer::processMatrixType(ArrayRef operands) { return success(); } +LogicalResult +spirv::Deserializer::processTypeForwardPointer(ArrayRef operands) { + if (operands.size() != 2) + return emitError(unknownLoc, + "OpTypeForwardPointer instruction must have two operands"); + + typeForwardPointerIDs.insert(operands[0]); + // TODO: Use the 2nd operand (Storage Class) to validate the OpTypePointer + // instruction that defines the actual type. + + return success(); +} + //===----------------------------------------------------------------------===// // Constant //===----------------------------------------------------------------------===// -LogicalResult Deserializer::processConstant(ArrayRef operands, - bool isSpec) { +LogicalResult spirv::Deserializer::processConstant(ArrayRef operands, + bool isSpec) { StringRef opname = isSpec ? "OpSpecConstant" : "OpConstant"; if (operands.size() < 2) { @@ -1682,9 +1122,8 @@ LogicalResult Deserializer::processConstant(ArrayRef operands, "scalar integer or floating-point type"); } -LogicalResult Deserializer::processConstantBool(bool isTrue, - ArrayRef operands, - bool isSpec) { +LogicalResult spirv::Deserializer::processConstantBool( + bool isTrue, ArrayRef operands, bool isSpec) { if (operands.size() != 2) { return emitError(unknownLoc, "Op") << (isSpec ? "Spec" : "") << "Constant" @@ -1706,7 +1145,7 @@ LogicalResult Deserializer::processConstantBool(bool isTrue, } LogicalResult -Deserializer::processConstantComposite(ArrayRef operands) { +spirv::Deserializer::processConstantComposite(ArrayRef operands) { if (operands.size() < 2) { return emitError(unknownLoc, "OpConstantComposite must have type and result "); @@ -1751,7 +1190,7 @@ Deserializer::processConstantComposite(ArrayRef operands) { } LogicalResult -Deserializer::processSpecConstantComposite(ArrayRef operands) { +spirv::Deserializer::processSpecConstantComposite(ArrayRef operands) { if (operands.size() < 2) { return emitError(unknownLoc, "OpConstantComposite must have type and result "); @@ -1786,7 +1225,7 @@ Deserializer::processSpecConstantComposite(ArrayRef operands) { } LogicalResult -Deserializer::processSpecConstantOperation(ArrayRef operands) { +spirv::Deserializer::processSpecConstantOperation(ArrayRef operands) { if (operands.size() < 3) return emitError(unknownLoc, "OpConstantOperation must have type , " "result , and operand opcode"); @@ -1812,7 +1251,7 @@ Deserializer::processSpecConstantOperation(ArrayRef operands) { return success(); } -Value Deserializer::materializeSpecConstantOperation( +Value spirv::Deserializer::materializeSpecConstantOperation( uint32_t resultID, spirv::Opcode enclosedOpcode, uint32_t resultTypeID, ArrayRef enclosedOpOperands) { @@ -1870,7 +1309,8 @@ Value Deserializer::materializeSpecConstantOperation( return specConstOperationOp.getResult(); } -LogicalResult Deserializer::processConstantNull(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processConstantNull(ArrayRef operands) { if (operands.size() != 2) { return emitError(unknownLoc, "OpConstantNull must have type and result "); @@ -1899,7 +1339,7 @@ LogicalResult Deserializer::processConstantNull(ArrayRef operands) { // Control flow //===----------------------------------------------------------------------===// -Block *Deserializer::getOrCreateBlock(uint32_t id) { +Block *spirv::Deserializer::getOrCreateBlock(uint32_t id) { if (auto *block = getBlock(id)) { LLVM_DEBUG(llvm::dbgs() << "[block] got exiting block for id = " << id << " @ " << block << "\n"); @@ -1915,7 +1355,7 @@ Block *Deserializer::getOrCreateBlock(uint32_t id) { return blockMap[id] = block; } -LogicalResult Deserializer::processBranch(ArrayRef operands) { +LogicalResult spirv::Deserializer::processBranch(ArrayRef operands) { if (!curBlock) { return emitError(unknownLoc, "OpBranch must appear inside a block"); } @@ -1936,7 +1376,7 @@ LogicalResult Deserializer::processBranch(ArrayRef operands) { } LogicalResult -Deserializer::processBranchConditional(ArrayRef operands) { +spirv::Deserializer::processBranchConditional(ArrayRef operands) { if (!curBlock) { return emitError(unknownLoc, "OpBranchConditional must appear inside a block"); @@ -1969,7 +1409,7 @@ Deserializer::processBranchConditional(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processLabel(ArrayRef operands) { +LogicalResult spirv::Deserializer::processLabel(ArrayRef operands) { if (!curFunction) { return emitError(unknownLoc, "OpLabel must appear inside a function"); } @@ -1991,7 +1431,8 @@ LogicalResult Deserializer::processLabel(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processSelectionMerge(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processSelectionMerge(ArrayRef operands) { if (!curBlock) { return emitError(unknownLoc, "OpSelectionMerge must appear in a block"); } @@ -2016,7 +1457,8 @@ LogicalResult Deserializer::processSelectionMerge(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processLoopMerge(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processLoopMerge(ArrayRef operands) { if (!curBlock) { return emitError(unknownLoc, "OpLoopMerge must appear in a block"); } @@ -2042,7 +1484,7 @@ LogicalResult Deserializer::processLoopMerge(ArrayRef operands) { return success(); } -LogicalResult Deserializer::processPhi(ArrayRef operands) { +LogicalResult spirv::Deserializer::processPhi(ArrayRef operands) { if (!curBlock) { return emitError(unknownLoc, "OpPhi must appear in a block"); } @@ -2086,7 +1528,7 @@ class ControlFlowStructurizer { /// This method will also update `mergeInfo` by remapping all blocks inside to /// the newly cloned ones inside structured control flow op's regions. static LogicalResult structurize(Location loc, uint32_t control, - BlockMergeInfoMap &mergeInfo, + spirv::BlockMergeInfoMap &mergeInfo, Block *headerBlock, Block *mergeBlock, Block *continueBlock) { return ControlFlowStructurizer(loc, control, mergeInfo, headerBlock, @@ -2096,7 +1538,7 @@ class ControlFlowStructurizer { private: ControlFlowStructurizer(Location loc, uint32_t control, - BlockMergeInfoMap &mergeInfo, Block *header, + spirv::BlockMergeInfoMap &mergeInfo, Block *header, Block *merge, Block *cont) : location(loc), control(control), blockMergeInfo(mergeInfo), headerBlock(header), mergeBlock(merge), continueBlock(cont) {} @@ -2115,7 +1557,7 @@ class ControlFlowStructurizer { Location location; uint32_t control; - BlockMergeInfoMap &blockMergeInfo; + spirv::BlockMergeInfoMap &blockMergeInfo; Block *headerBlock; Block *mergeBlock; @@ -2339,7 +1781,7 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() { return success(); } -LogicalResult Deserializer::wireUpBlockArgument() { +LogicalResult spirv::Deserializer::wireUpBlockArgument() { LLVM_DEBUG(llvm::dbgs() << "[phi] start wiring up block arguments\n"); OpBuilder::InsertionGuard guard(opBuilder); @@ -2388,7 +1830,7 @@ LogicalResult Deserializer::wireUpBlockArgument() { return success(); } -LogicalResult Deserializer::structurizeControlFlow() { +LogicalResult spirv::Deserializer::structurizeControlFlow() { LLVM_DEBUG(llvm::dbgs() << "[cf] start structurizing control flow\n"); while (!blockMergeInfo.empty()) { @@ -2428,7 +1870,7 @@ LogicalResult Deserializer::structurizeControlFlow() { // Debug //===----------------------------------------------------------------------===// -Location Deserializer::createFileLineColLoc(OpBuilder opBuilder) { +Location spirv::Deserializer::createFileLineColLoc(OpBuilder opBuilder) { if (!debugLine) return unknownLoc; @@ -2439,7 +1881,8 @@ Location Deserializer::createFileLineColLoc(OpBuilder opBuilder) { debugLine->line, debugLine->col); } -LogicalResult Deserializer::processDebugLine(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processDebugLine(ArrayRef operands) { // According to SPIR-V spec: // "This location information applies to the instructions physically // following this instruction, up to the first occurrence of any of the @@ -2451,12 +1894,13 @@ LogicalResult Deserializer::processDebugLine(ArrayRef operands) { return success(); } -LogicalResult Deserializer::clearDebugLine() { +LogicalResult spirv::Deserializer::clearDebugLine() { debugLine = llvm::None; return success(); } -LogicalResult Deserializer::processDebugString(ArrayRef operands) { +LogicalResult +spirv::Deserializer::processDebugString(ArrayRef operands) { if (operands.size() < 2) return emitError(unknownLoc, "OpString needs at least 2 operands"); @@ -2474,560 +1918,3 @@ LogicalResult Deserializer::processDebugString(ArrayRef operands) { debugInfoMap[operands[0]] = debugString; return success(); } - -//===----------------------------------------------------------------------===// -// Instruction -//===----------------------------------------------------------------------===// - -Value Deserializer::getValue(uint32_t id) { - if (auto constInfo = getConstant(id)) { - // Materialize a `spv.constant` op at every use site. - return opBuilder.create(unknownLoc, constInfo->second, - constInfo->first); - } - if (auto varOp = getGlobalVariable(id)) { - auto addressOfOp = opBuilder.create( - unknownLoc, varOp.type(), - opBuilder.getSymbolRefAttr(varOp.getOperation())); - return addressOfOp.pointer(); - } - if (auto constOp = getSpecConstant(id)) { - auto referenceOfOp = opBuilder.create( - unknownLoc, constOp.default_value().getType(), - opBuilder.getSymbolRefAttr(constOp.getOperation())); - return referenceOfOp.reference(); - } - if (auto constCompositeOp = getSpecConstantComposite(id)) { - auto referenceOfOp = opBuilder.create( - unknownLoc, constCompositeOp.type(), - opBuilder.getSymbolRefAttr(constCompositeOp.getOperation())); - return referenceOfOp.reference(); - } - if (auto specConstOperationInfo = getSpecConstantOperation(id)) { - return materializeSpecConstantOperation( - id, specConstOperationInfo->enclodesOpcode, - specConstOperationInfo->resultTypeID, - specConstOperationInfo->enclosedOpOperands); - } - if (auto undef = getUndefType(id)) { - return opBuilder.create(unknownLoc, undef); - } - return valueMap.lookup(id); -} - -LogicalResult -Deserializer::sliceInstruction(spirv::Opcode &opcode, - ArrayRef &operands, - Optional expectedOpcode) { - auto binarySize = binary.size(); - if (curOffset >= binarySize) { - return emitError(unknownLoc, "expected ") - << (expectedOpcode ? spirv::stringifyOpcode(*expectedOpcode) - : "more") - << " instruction"; - } - - // For each instruction, get its word count from the first word to slice it - // from the stream properly, and then dispatch to the instruction handler. - - uint32_t wordCount = binary[curOffset] >> 16; - - if (wordCount == 0) - return emitError(unknownLoc, "word count cannot be zero"); - - uint32_t nextOffset = curOffset + wordCount; - if (nextOffset > binarySize) - return emitError(unknownLoc, "insufficient words for the last instruction"); - - opcode = extractOpcode(binary[curOffset]); - operands = binary.slice(curOffset + 1, wordCount - 1); - curOffset = nextOffset; - return success(); -} - -LogicalResult Deserializer::processInstruction(spirv::Opcode opcode, - ArrayRef operands, - bool deferInstructions) { - LLVM_DEBUG(llvm::dbgs() << "[inst] processing instruction " - << spirv::stringifyOpcode(opcode) << "\n"); - - // First dispatch all the instructions whose opcode does not correspond to - // those that have a direct mirror in the SPIR-V dialect - switch (opcode) { - case spirv::Opcode::OpCapability: - return processCapability(operands); - case spirv::Opcode::OpExtension: - return processExtension(operands); - case spirv::Opcode::OpExtInst: - return processExtInst(operands); - case spirv::Opcode::OpExtInstImport: - return processExtInstImport(operands); - case spirv::Opcode::OpMemberName: - return processMemberName(operands); - case spirv::Opcode::OpMemoryModel: - return processMemoryModel(operands); - case spirv::Opcode::OpEntryPoint: - case spirv::Opcode::OpExecutionMode: - if (deferInstructions) { - deferredInstructions.emplace_back(opcode, operands); - return success(); - } - break; - case spirv::Opcode::OpVariable: - if (isa(opBuilder.getBlock()->getParentOp())) { - return processGlobalVariable(operands); - } - break; - case spirv::Opcode::OpLine: - return processDebugLine(operands); - case spirv::Opcode::OpNoLine: - return clearDebugLine(); - case spirv::Opcode::OpName: - return processName(operands); - case spirv::Opcode::OpString: - return processDebugString(operands); - case spirv::Opcode::OpModuleProcessed: - case spirv::Opcode::OpSource: - case spirv::Opcode::OpSourceContinued: - case spirv::Opcode::OpSourceExtension: - // TODO: This is debug information embedded in the binary which should be - // translated into the spv.module. - return success(); - case spirv::Opcode::OpTypeVoid: - case spirv::Opcode::OpTypeBool: - case spirv::Opcode::OpTypeInt: - case spirv::Opcode::OpTypeFloat: - case spirv::Opcode::OpTypeVector: - case spirv::Opcode::OpTypeMatrix: - case spirv::Opcode::OpTypeArray: - case spirv::Opcode::OpTypeFunction: - case spirv::Opcode::OpTypeRuntimeArray: - case spirv::Opcode::OpTypeStruct: - case spirv::Opcode::OpTypePointer: - case spirv::Opcode::OpTypeCooperativeMatrixNV: - return processType(opcode, operands); - case spirv::Opcode::OpConstant: - return processConstant(operands, /*isSpec=*/false); - case spirv::Opcode::OpSpecConstant: - return processConstant(operands, /*isSpec=*/true); - case spirv::Opcode::OpConstantComposite: - return processConstantComposite(operands); - case spirv::Opcode::OpSpecConstantComposite: - return processSpecConstantComposite(operands); - case spirv::Opcode::OpSpecConstantOperation: - return processSpecConstantOperation(operands); - case spirv::Opcode::OpConstantTrue: - return processConstantBool(/*isTrue=*/true, operands, /*isSpec=*/false); - case spirv::Opcode::OpSpecConstantTrue: - return processConstantBool(/*isTrue=*/true, operands, /*isSpec=*/true); - case spirv::Opcode::OpConstantFalse: - return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/false); - case spirv::Opcode::OpSpecConstantFalse: - return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/true); - case spirv::Opcode::OpConstantNull: - return processConstantNull(operands); - case spirv::Opcode::OpDecorate: - return processDecoration(operands); - case spirv::Opcode::OpMemberDecorate: - return processMemberDecoration(operands); - case spirv::Opcode::OpFunction: - return processFunction(operands); - case spirv::Opcode::OpLabel: - return processLabel(operands); - case spirv::Opcode::OpBranch: - return processBranch(operands); - case spirv::Opcode::OpBranchConditional: - return processBranchConditional(operands); - case spirv::Opcode::OpSelectionMerge: - return processSelectionMerge(operands); - case spirv::Opcode::OpLoopMerge: - return processLoopMerge(operands); - case spirv::Opcode::OpPhi: - return processPhi(operands); - case spirv::Opcode::OpUndef: - return processUndef(operands); - case spirv::Opcode::OpTypeForwardPointer: - return processTypeForwardPointer(operands); - default: - break; - } - return dispatchToAutogenDeserialization(opcode, operands); -} - -LogicalResult -Deserializer::processOpWithoutGrammarAttr(ArrayRef words, - StringRef opName, bool hasResult, - unsigned numOperands) { - SmallVector resultTypes; - uint32_t valueID = 0; - - size_t wordIndex= 0; - if (hasResult) { - if (wordIndex >= words.size()) - return emitError(unknownLoc, - "expected result type while deserializing for ") - << opName; - - // Decode the type - auto type = getType(words[wordIndex]); - if (!type) - return emitError(unknownLoc, "unknown type result : ") - << words[wordIndex]; - resultTypes.push_back(type); - ++wordIndex; - - // Decode the result - if (wordIndex >= words.size()) - return emitError(unknownLoc, - "expected result while deserializing for ") - << opName; - valueID = words[wordIndex]; - ++wordIndex; - } - - SmallVector operands; - SmallVector attributes; - - // Decode operands - size_t operandIndex = 0; - for (; operandIndex < numOperands && wordIndex < words.size(); - ++operandIndex, ++wordIndex) { - auto arg = getValue(words[wordIndex]); - if (!arg) - return emitError(unknownLoc, "unknown result : ") << words[wordIndex]; - operands.push_back(arg); - } - if (operandIndex != numOperands) { - return emitError( - unknownLoc, - "found less operands than expected when deserializing for ") - << opName << "; only " << operandIndex << " of " << numOperands - << " processed"; - } - if (wordIndex != words.size()) { - return emitError( - unknownLoc, - "found more operands than expected when deserializing for ") - << opName << "; only " << wordIndex << " of " << words.size() - << " processed"; - } - - // Attach attributes from decorations - if (decorations.count(valueID)) { - auto attrs = decorations[valueID].getAttrs(); - attributes.append(attrs.begin(), attrs.end()); - } - - // Create the op and update bookkeeping maps - Location loc = createFileLineColLoc(opBuilder); - OperationState opState(loc, opName); - opState.addOperands(operands); - if (hasResult) - opState.addTypes(resultTypes); - opState.addAttributes(attributes); - Operation *op = opBuilder.createOperation(opState); - if (hasResult) - valueMap[valueID] = op->getResult(0); - - if (op->hasTrait()) - clearDebugLine(); - - return success(); -} - -LogicalResult Deserializer::processUndef(ArrayRef operands) { - if (operands.size() != 2) { - return emitError(unknownLoc, "OpUndef instruction must have two operands"); - } - auto type = getType(operands[0]); - if (!type) { - return emitError(unknownLoc, "unknown type with OpUndef instruction"); - } - undefMap[operands[1]] = type; - return success(); -} - -LogicalResult -Deserializer::processTypeForwardPointer(ArrayRef operands) { - if (operands.size() != 2) - return emitError(unknownLoc, - "OpTypeForwardPointer instruction must have two operands"); - - typeForwardPointerIDs.insert(operands[0]); - // TODO: Use the 2nd operand (Storage Class) to validate the OpTypePointer - // instruction that defines the actual type. - - return success(); -} - -LogicalResult Deserializer::processExtInst(ArrayRef operands) { - if (operands.size() < 4) { - return emitError(unknownLoc, - "OpExtInst must have at least 4 operands, result type " - ", result , set and instruction opcode"); - } - if (!extendedInstSets.count(operands[2])) { - return emitError(unknownLoc, "undefined set in OpExtInst"); - } - SmallVector slicedOperands; - slicedOperands.append(operands.begin(), std::next(operands.begin(), 2)); - slicedOperands.append(std::next(operands.begin(), 4), operands.end()); - return dispatchToExtensionSetAutogenDeserialization( - extendedInstSets[operands[2]], operands[3], slicedOperands); -} - -namespace { - -template <> -LogicalResult -Deserializer::processOp(ArrayRef words) { - unsigned wordIndex = 0; - if (wordIndex >= words.size()) { - return emitError(unknownLoc, - "missing Execution Model specification in OpEntryPoint"); - } - auto execModel = opBuilder.getI32IntegerAttr(words[wordIndex++]); - if (wordIndex >= words.size()) { - return emitError(unknownLoc, "missing in OpEntryPoint"); - } - // Get the function - auto fnID = words[wordIndex++]; - // Get the function name - auto fnName = decodeStringLiteral(words, wordIndex); - // Verify that the function matches the fnName - auto parsedFunc = getFunction(fnID); - if (!parsedFunc) { - return emitError(unknownLoc, "no function matching ") << fnID; - } - if (parsedFunc.getName() != fnName) { - return emitError(unknownLoc, "function name mismatch between OpEntryPoint " - "and OpFunction with ") - << fnID << ": " << fnName << " vs. " << parsedFunc.getName(); - } - SmallVector interface; - while (wordIndex < words.size()) { - auto arg = getGlobalVariable(words[wordIndex]); - if (!arg) { - return emitError(unknownLoc, "undefined result ") - << words[wordIndex] << " while decoding OpEntryPoint"; - } - interface.push_back(opBuilder.getSymbolRefAttr(arg.getOperation())); - wordIndex++; - } - opBuilder.create(unknownLoc, execModel, - opBuilder.getSymbolRefAttr(fnName), - opBuilder.getArrayAttr(interface)); - return success(); -} - -template <> -LogicalResult -Deserializer::processOp(ArrayRef words) { - unsigned wordIndex = 0; - if (wordIndex >= words.size()) { - return emitError(unknownLoc, - "missing function result in OpExecutionMode"); - } - // Get the function to get the name of the function - auto fnID = words[wordIndex++]; - auto fn = getFunction(fnID); - if (!fn) { - return emitError(unknownLoc, "no function matching ") << fnID; - } - // Get the Execution mode - if (wordIndex >= words.size()) { - return emitError(unknownLoc, "missing Execution Mode in OpExecutionMode"); - } - auto execMode = opBuilder.getI32IntegerAttr(words[wordIndex++]); - - // Get the values - SmallVector attrListElems; - while (wordIndex < words.size()) { - attrListElems.push_back(opBuilder.getI32IntegerAttr(words[wordIndex++])); - } - auto values = opBuilder.getArrayAttr(attrListElems); - opBuilder.create( - unknownLoc, opBuilder.getSymbolRefAttr(fn.getName()), execMode, values); - return success(); -} - -template <> -LogicalResult -Deserializer::processOp(ArrayRef operands) { - if (operands.size() != 3) { - return emitError( - unknownLoc, - "OpControlBarrier must have execution scope , memory scope " - "and memory semantics "); - } - - SmallVector argAttrs; - for (auto operand : operands) { - auto argAttr = getConstantInt(operand); - if (!argAttr) { - return emitError(unknownLoc, - "expected 32-bit integer constant from ") - << operand << " for OpControlBarrier"; - } - argAttrs.push_back(argAttr); - } - - opBuilder.create(unknownLoc, argAttrs[0], - argAttrs[1], argAttrs[2]); - return success(); -} - -template <> -LogicalResult -Deserializer::processOp(ArrayRef operands) { - if (operands.size() < 3) { - return emitError(unknownLoc, - "OpFunctionCall must have at least 3 operands"); - } - - Type resultType = getType(operands[0]); - if (!resultType) { - return emitError(unknownLoc, "undefined result type from ") - << operands[0]; - } - - // Use null type to mean no result type. - if (isVoidType(resultType)) - resultType = nullptr; - - auto resultID = operands[1]; - auto functionID = operands[2]; - - auto functionName = getFunctionSymbol(functionID); - - SmallVector arguments; - for (auto operand : llvm::drop_begin(operands, 3)) { - auto value = getValue(operand); - if (!value) { - return emitError(unknownLoc, "unknown ") - << operand << " used by OpFunctionCall"; - } - arguments.push_back(value); - } - - auto opFunctionCall = opBuilder.create( - unknownLoc, resultType, opBuilder.getSymbolRefAttr(functionName), - arguments); - - if (resultType) - valueMap[resultID] = opFunctionCall.getResult(0); - return success(); -} - -template <> -LogicalResult -Deserializer::processOp(ArrayRef operands) { - if (operands.size() != 2) { - return emitError(unknownLoc, "OpMemoryBarrier must have memory scope " - "and memory semantics "); - } - - SmallVector argAttrs; - for (auto operand : operands) { - auto argAttr = getConstantInt(operand); - if (!argAttr) { - return emitError(unknownLoc, - "expected 32-bit integer constant from ") - << operand << " for OpMemoryBarrier"; - } - argAttrs.push_back(argAttr); - } - - opBuilder.create(unknownLoc, argAttrs[0], - argAttrs[1]); - return success(); -} - -template <> -LogicalResult -Deserializer::processOp(ArrayRef words) { - SmallVector resultTypes; - size_t wordIndex = 0; - SmallVector operands; - SmallVector attributes; - - if (wordIndex < words.size()) { - auto arg = getValue(words[wordIndex]); - - if (!arg) { - return emitError(unknownLoc, "unknown result : ") - << words[wordIndex]; - } - - operands.push_back(arg); - wordIndex++; - } - - if (wordIndex < words.size()) { - auto arg = getValue(words[wordIndex]); - - if (!arg) { - return emitError(unknownLoc, "unknown result : ") - << words[wordIndex]; - } - - operands.push_back(arg); - wordIndex++; - } - - bool isAlignedAttr = false; - - if (wordIndex < words.size()) { - auto attrValue = words[wordIndex++]; - attributes.push_back(opBuilder.getNamedAttr( - "memory_access", opBuilder.getI32IntegerAttr(attrValue))); - isAlignedAttr = (attrValue == 2); - } - - if (isAlignedAttr && wordIndex < words.size()) { - attributes.push_back(opBuilder.getNamedAttr( - "alignment", opBuilder.getI32IntegerAttr(words[wordIndex++]))); - } - - if (wordIndex < words.size()) { - attributes.push_back(opBuilder.getNamedAttr( - "source_memory_access", - opBuilder.getI32IntegerAttr(words[wordIndex++]))); - } - - if (wordIndex < words.size()) { - attributes.push_back(opBuilder.getNamedAttr( - "source_alignment", opBuilder.getI32IntegerAttr(words[wordIndex++]))); - } - - if (wordIndex != words.size()) { - return emitError(unknownLoc, - "found more operands than expected when deserializing " - "spirv::CopyMemoryOp, only ") - << wordIndex << " of " << words.size() << " processed"; - } - - Location loc = createFileLineColLoc(opBuilder); - opBuilder.create(loc, resultTypes, operands, attributes); - - return success(); -} - -// Pull in auto-generated Deserializer::dispatchToAutogenDeserialization() and -// various Deserializer::processOp<...>() specializations. -#define GET_DESERIALIZATION_FNS -#include "mlir/Dialect/SPIRV/IR/SPIRVSerialization.inc" - -} // namespace - -namespace mlir { -spirv::OwningSPIRVModuleRef spirv::deserialize(ArrayRef binary, - MLIRContext *context) { - Deserializer deserializer(binary, context); - - if (failed(deserializer.deserialize())) - return nullptr; - - return deserializer.collect(); -} -} // namespace mlir diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h new file mode 100644 index 00000000000000..826441da1dc00e --- /dev/null +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h @@ -0,0 +1,613 @@ +//===- Deserializer.h - MLIR SPIR-V Deserializer ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SPIR-V binary to MLIR SPIR-V module deserializer. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_SPIRV_DESERIALIZER_H +#define MLIR_TARGET_SPIRV_DESERIALIZER_H + +#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" +#include "mlir/Dialect/SPIRV/IR/SPIRVModule.h" +#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" +#include "mlir/IR/Builders.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringRef.h" +#include + +//===----------------------------------------------------------------------===// +// Utility Functions +//===----------------------------------------------------------------------===// + +/// Decodes a string literal in `words` starting at `wordIndex`. Update the +/// latter to point to the position in words after the string literal. +static inline llvm::StringRef +decodeStringLiteral(llvm::ArrayRef words, unsigned &wordIndex) { + llvm::StringRef str(reinterpret_cast(words.data() + wordIndex)); + wordIndex += str.size() / 4 + 1; + return str; +} + +namespace mlir { +namespace spirv { + +//===----------------------------------------------------------------------===// +// Utility Definitions +//===----------------------------------------------------------------------===// + +/// A struct for containing a header block's merge and continue targets. +/// +/// This struct is used to track original structured control flow info from +/// SPIR-V blob. This info will be used to create spv.selection/spv.loop +/// later. +struct BlockMergeInfo { + Block *mergeBlock; + Block *continueBlock; // nullptr for spv.selection + Location loc; + uint32_t control; + + BlockMergeInfo(Location location, uint32_t control) + : mergeBlock(nullptr), continueBlock(nullptr), loc(location), + control(control) {} + BlockMergeInfo(Location location, uint32_t control, Block *m, + Block *c = nullptr) + : mergeBlock(m), continueBlock(c), loc(location), control(control) {} +}; + +/// A struct for containing OpLine instruction information. +struct DebugLine { + uint32_t fileID; + uint32_t line; + uint32_t col; + + DebugLine(uint32_t fileIDNum, uint32_t lineNum, uint32_t colNum) + : fileID(fileIDNum), line(lineNum), col(colNum) {} +}; + +/// Map from a selection/loop's header block to its merge (and continue) target. +using BlockMergeInfoMap = DenseMap; + +/// A "deferred struct type" is a struct type with one or more member types not +/// known when the Deserializer first encounters the struct. This happens, for +/// example, with recursive structs where a pointer to the struct type is +/// forward declared through OpTypeForwardPointer in the SPIR-V module before +/// the struct declaration; the actual pointer to struct type should be defined +/// later through an OpTypePointer. For example, the following C struct: +/// +/// struct A { +/// A* next; +/// }; +/// +/// would be represented in the SPIR-V module as: +/// +/// OpName %A "A" +/// OpTypeForwardPointer %APtr Generic +/// %A = OpTypeStruct %APtr +/// %APtr = OpTypePointer Generic %A +/// +/// This means that the spirv::StructType cannot be fully constructed directly +/// when the Deserializer encounters it. Instead we create a +/// DeferredStructTypeInfo that contains all the information we know about the +/// spirv::StructType. Once all forward references for the struct are resolved, +/// the struct's body is set with all member info. +struct DeferredStructTypeInfo { + spirv::StructType deferredStructType; + + // A list of all unresolved member types for the struct. First element of each + // item is operand ID, second element is member index in the struct. + SmallVector, 0> unresolvedMemberTypes; + + // The list of member types. For unresolved members, this list contains + // place-holder empty types that will be updated later. + SmallVector memberTypes; + SmallVector offsetInfo; + SmallVector memberDecorationsInfo; +}; + +/// A struct that collects the info needed to materialize/emit a +/// SpecConstantOperation op. +struct SpecConstOperationMaterializationInfo { + spirv::Opcode enclodesOpcode; + uint32_t resultTypeID; + SmallVector enclosedOpOperands; +}; + +//===----------------------------------------------------------------------===// +// Deserializer Declaration +//===----------------------------------------------------------------------===// + +/// A SPIR-V module serializer. +/// +/// A SPIR-V binary module is a single linear stream of instructions; each +/// instruction is composed of 32-bit words. The first word of an instruction +/// records the total number of words of that instruction using the 16 +/// higher-order bits. So this deserializer uses that to get instruction +/// boundary and parse instructions and build a SPIR-V ModuleOp gradually. +/// +// TODO: clean up created ops on errors +class Deserializer { +public: + /// Creates a deserializer for the given SPIR-V `binary` module. + /// The SPIR-V ModuleOp will be created into `context. + explicit Deserializer(ArrayRef binary, MLIRContext *context); + + /// Deserializes the remembered SPIR-V binary module. + LogicalResult deserialize(); + + /// Collects the final SPIR-V ModuleOp. + spirv::OwningSPIRVModuleRef collect(); + +private: + //===--------------------------------------------------------------------===// + // Module structure + //===--------------------------------------------------------------------===// + + /// Initializes the `module` ModuleOp in this deserializer instance. + spirv::OwningSPIRVModuleRef createModuleOp(); + + /// Processes SPIR-V module header in `binary`. + LogicalResult processHeader(); + + /// Processes the SPIR-V OpCapability with `operands` and updates bookkeeping + /// in the deserializer. + LogicalResult processCapability(ArrayRef operands); + + /// Processes the SPIR-V OpExtension with `operands` and updates bookkeeping + /// in the deserializer. + LogicalResult processExtension(ArrayRef words); + + /// Processes the SPIR-V OpExtInstImport with `operands` and updates + /// bookkeeping in the deserializer. + LogicalResult processExtInstImport(ArrayRef words); + + /// Attaches (version, capabilities, extensions) triple to `module` as an + /// attribute. + void attachVCETriple(); + + /// Processes the SPIR-V OpMemoryModel with `operands` and updates `module`. + LogicalResult processMemoryModel(ArrayRef operands); + + /// Process SPIR-V OpName with `operands`. + LogicalResult processName(ArrayRef operands); + + /// Processes an OpDecorate instruction. + LogicalResult processDecoration(ArrayRef words); + + // Processes an OpMemberDecorate instruction. + LogicalResult processMemberDecoration(ArrayRef words); + + /// Processes an OpMemberName instruction. + LogicalResult processMemberName(ArrayRef words); + + /// Gets the function op associated with a result of OpFunction. + spirv::FuncOp getFunction(uint32_t id) { return funcMap.lookup(id); } + + /// Processes the SPIR-V function at the current `offset` into `binary`. + /// The operands to the OpFunction instruction is passed in as ``operands`. + /// This method processes each instruction inside the function and dispatches + /// them to their handler method accordingly. + LogicalResult processFunction(ArrayRef operands); + + /// Processes OpFunctionEnd and finalizes function. This wires up block + /// argument created from OpPhi instructions and also structurizes control + /// flow. + LogicalResult processFunctionEnd(ArrayRef operands); + + /// Gets the constant's attribute and type associated with the given . + Optional> getConstant(uint32_t id); + + /// Gets the info needed to materialize the spec constant operation op + /// associated with the given . + Optional + getSpecConstantOperation(uint32_t id); + + /// Gets the constant's integer attribute with the given . Returns a + /// null IntegerAttr if the given is not registered or does not correspond + /// to an integer constant. + IntegerAttr getConstantInt(uint32_t id); + + /// Returns a symbol to be used for the function name with the given + /// result . This tries to use the function's OpName if + /// exists; otherwise creates one based on the . + std::string getFunctionSymbol(uint32_t id); + + /// Returns a symbol to be used for the specialization constant with the given + /// result . This tries to use the specialization constant's OpName if + /// exists; otherwise creates one based on the . + std::string getSpecConstantSymbol(uint32_t id); + + /// Gets the specialization constant with the given result . + spirv::SpecConstantOp getSpecConstant(uint32_t id) { + return specConstMap.lookup(id); + } + + /// Gets the composite specialization constant with the given result . + spirv::SpecConstantCompositeOp getSpecConstantComposite(uint32_t id) { + return specConstCompositeMap.lookup(id); + } + + /// Creates a spirv::SpecConstantOp. + spirv::SpecConstantOp createSpecConstant(Location loc, uint32_t resultID, + Attribute defaultValue); + + /// Processes the OpVariable instructions at current `offset` into `binary`. + /// It is expected that this method is used for variables that are to be + /// defined at module scope and will be deserialized into a spv.globalVariable + /// instruction. + LogicalResult processGlobalVariable(ArrayRef operands); + + /// Gets the global variable associated with a result of OpVariable. + spirv::GlobalVariableOp getGlobalVariable(uint32_t id) { + return globalVariableMap.lookup(id); + } + + //===--------------------------------------------------------------------===// + // Type + //===--------------------------------------------------------------------===// + + /// Gets type for a given result . + Type getType(uint32_t id) { return typeMap.lookup(id); } + + /// Get the type associated with the result of an OpUndef. + Type getUndefType(uint32_t id) { return undefMap.lookup(id); } + + /// Returns true if the given `type` is for SPIR-V void type. + bool isVoidType(Type type) const { return type.isa(); } + + /// Processes a SPIR-V type instruction with given `opcode` and `operands` and + /// registers the type into `module`. + LogicalResult processType(spirv::Opcode opcode, ArrayRef operands); + + LogicalResult processOpTypePointer(ArrayRef operands); + + LogicalResult processArrayType(ArrayRef operands); + + LogicalResult processCooperativeMatrixType(ArrayRef operands); + + LogicalResult processFunctionType(ArrayRef operands); + + LogicalResult processRuntimeArrayType(ArrayRef operands); + + LogicalResult processStructType(ArrayRef operands); + + LogicalResult processMatrixType(ArrayRef operands); + + LogicalResult processTypeForwardPointer(ArrayRef operands); + + //===--------------------------------------------------------------------===// + // Constant + //===--------------------------------------------------------------------===// + + /// Processes a SPIR-V Op{|Spec}Constant instruction with the given + /// `operands`. `isSpec` indicates whether this is a specialization constant. + LogicalResult processConstant(ArrayRef operands, bool isSpec); + + /// Processes a SPIR-V Op{|Spec}Constant{True|False} instruction with the + /// given `operands`. `isSpec` indicates whether this is a specialization + /// constant. + LogicalResult processConstantBool(bool isTrue, ArrayRef operands, + bool isSpec); + + /// Processes a SPIR-V OpConstantComposite instruction with the given + /// `operands`. + LogicalResult processConstantComposite(ArrayRef operands); + + /// Processes a SPIR-V OpSpecConstantComposite instruction with the given + /// `operands`. + LogicalResult processSpecConstantComposite(ArrayRef operands); + + /// Processes a SPIR-V OpSpecConstantOperation instruction with the given + /// `operands`. + LogicalResult processSpecConstantOperation(ArrayRef operands); + + /// Materializes/emits an OpSpecConstantOperation instruction. + Value materializeSpecConstantOperation(uint32_t resultID, + spirv::Opcode enclosedOpcode, + uint32_t resultTypeID, + ArrayRef enclosedOpOperands); + + /// Processes a SPIR-V OpConstantNull instruction with the given `operands`. + LogicalResult processConstantNull(ArrayRef operands); + + //===--------------------------------------------------------------------===// + // Debug + //===--------------------------------------------------------------------===// + + /// Discontinues any source-level location information that might be active + /// from a previous OpLine instruction. + LogicalResult clearDebugLine(); + + /// Creates a FileLineColLoc with the OpLine location information. + Location createFileLineColLoc(OpBuilder opBuilder); + + /// Processes a SPIR-V OpLine instruction with the given `operands`. + LogicalResult processDebugLine(ArrayRef operands); + + /// Processes a SPIR-V OpString instruction with the given `operands`. + LogicalResult processDebugString(ArrayRef operands); + + //===--------------------------------------------------------------------===// + // Control flow + //===--------------------------------------------------------------------===// + + /// Returns the block for the given label . + Block *getBlock(uint32_t id) const { return blockMap.lookup(id); } + + // In SPIR-V, structured control flow is explicitly declared using merge + // instructions (OpSelectionMerge and OpLoopMerge). In the SPIR-V dialect, + // we use spv.selection and spv.loop to group structured control flow. + // The deserializer need to turn structured control flow marked with merge + // instructions into using spv.selection/spv.loop ops. + // + // Because structured control flow can nest and the basic block order have + // flexibility, we cannot isolate a structured selection/loop without + // deserializing all the blocks. So we use the following approach: + // + // 1. Deserialize all basic blocks in a function and create MLIR blocks for + // them into the function's region. In the meanwhile, keep a map between + // selection/loop header blocks to their corresponding merge (and continue) + // target blocks. + // 2. For each selection/loop header block, recursively get all basic blocks + // reachable (except the merge block) and put them in a newly created + // spv.selection/spv.loop's region. Structured control flow guarantees + // that we enter and exit in structured ways and the construct is nestable. + // 3. Put the new spv.selection/spv.loop op at the beginning of the old merge + // block and redirect all branches to the old header block to the old + // merge block (which contains the spv.selection/spv.loop op now). + + /// For OpPhi instructions, we use block arguments to represent them. OpPhi + /// encodes a list of (value, predecessor) pairs. At the time of handling the + /// block containing an OpPhi instruction, the predecessor block might not be + /// processed yet, also the value sent by it. So we need to defer handling + /// the block argument from the predecessors. We use the following approach: + /// + /// 1. For each OpPhi instruction, add a block argument to the current block + /// in construction. Record the block argument in `valueMap` so its uses + /// can be resolved. For the list of (value, predecessor) pairs, update + /// `blockPhiInfo` for bookkeeping. + /// 2. After processing all blocks, loop over `blockPhiInfo` to fix up each + /// block recorded there to create the proper block arguments on their + /// terminators. + + /// A data structure for containing a SPIR-V block's phi info. It will be + /// represented as block argument in SPIR-V dialect. + using BlockPhiInfo = + SmallVector; // The result of the values sent + + /// Gets or creates the block corresponding to the given label . The newly + /// created block will always be placed at the end of the current function. + Block *getOrCreateBlock(uint32_t id); + + LogicalResult processBranch(ArrayRef operands); + + LogicalResult processBranchConditional(ArrayRef operands); + + /// Processes a SPIR-V OpLabel instruction with the given `operands`. + LogicalResult processLabel(ArrayRef operands); + + /// Processes a SPIR-V OpSelectionMerge instruction with the given `operands`. + LogicalResult processSelectionMerge(ArrayRef operands); + + /// Processes a SPIR-V OpLoopMerge instruction with the given `operands`. + LogicalResult processLoopMerge(ArrayRef operands); + + /// Processes a SPIR-V OpPhi instruction with the given `operands`. + LogicalResult processPhi(ArrayRef operands); + + /// Creates block arguments on predecessors previously recorded when handling + /// OpPhi instructions. + LogicalResult wireUpBlockArgument(); + + /// Extracts blocks belonging to a structured selection/loop into a + /// spv.selection/spv.loop op. This method iterates until all blocks + /// declared as selection/loop headers are handled. + LogicalResult structurizeControlFlow(); + + //===--------------------------------------------------------------------===// + // Instruction + //===--------------------------------------------------------------------===// + + /// Get the Value associated with a result . + /// + /// This method materializes normal constants and inserts "casting" ops + /// (`spv.mlir.addressof` and `spv.mlir.referenceof`) to turn an symbol into a + /// SSA value for handling uses of module scope constants/variables in + /// functions. + Value getValue(uint32_t id); + + /// Slices the first instruction out of `binary` and returns its opcode and + /// operands via `opcode` and `operands` respectively. Returns failure if + /// there is no more remaining instructions (`expectedOpcode` will be used to + /// compose the error message) or the next instruction is malformed. + LogicalResult + sliceInstruction(spirv::Opcode &opcode, ArrayRef &operands, + Optional expectedOpcode = llvm::None); + + /// Processes a SPIR-V instruction with the given `opcode` and `operands`. + /// This method is the main entrance for handling SPIR-V instruction; it + /// checks the instruction opcode and dispatches to the corresponding handler. + /// Processing of Some instructions (like OpEntryPoint and OpExecutionMode) + /// might need to be deferred, since they contain forward references to s + /// in the deserialized binary, but module in SPIR-V dialect expects these to + /// be ssa-uses. + LogicalResult processInstruction(spirv::Opcode opcode, + ArrayRef operands, + bool deferInstructions = true); + + /// Processes a SPIR-V instruction from the given `operands`. It should + /// deserialize into an op with the given `opName` and `numOperands`. + /// This method is a generic one for dispatching any SPIR-V ops without + /// variadic operands and attributes in TableGen definitions. + LogicalResult processOpWithoutGrammarAttr(ArrayRef words, + StringRef opName, bool hasResult, + unsigned numOperands); + + /// Processes a OpUndef instruction. Adds a spv.Undef operation at the current + /// insertion point. + LogicalResult processUndef(ArrayRef operands); + + /// Method to dispatch to the specialized deserialization function for an + /// operation in SPIR-V dialect that is a mirror of an instruction in the + /// SPIR-V spec. This is auto-generated from ODS. Dispatch is handled for + /// all operations in SPIR-V dialect that have hasOpcode == 1. + LogicalResult dispatchToAutogenDeserialization(spirv::Opcode opcode, + ArrayRef words); + + /// Processes a SPIR-V OpExtInst with given `operands`. This slices the + /// entries of `operands` that specify the extended instruction set and + /// the instruction opcode. The op deserializer is then invoked using the + /// other entries. + LogicalResult processExtInst(ArrayRef operands); + + /// Dispatches the deserialization of extended instruction set operation based + /// on the extended instruction set name, and instruction opcode. This is + /// autogenerated from ODS. + LogicalResult + dispatchToExtensionSetAutogenDeserialization(StringRef extensionSetName, + uint32_t instructionID, + ArrayRef words); + + /// Method to deserialize an operation in the SPIR-V dialect that is a mirror + /// of an instruction in the SPIR-V spec. This is auto generated if hasOpcode + /// == 1 and autogenSerialization == 1 in ODS. + template LogicalResult processOp(ArrayRef words) { + return emitError(unknownLoc, "unsupported deserialization for ") + << OpTy::getOperationName() << " op"; + } + +private: + /// The SPIR-V binary module. + ArrayRef binary; + + /// Contains the data of the OpLine instruction which precedes the current + /// processing instruction. + llvm::Optional debugLine; + + /// The current word offset into the binary module. + unsigned curOffset = 0; + + /// MLIRContext to create SPIR-V ModuleOp into. + MLIRContext *context; + + // TODO: create Location subclass for binary blob + Location unknownLoc; + + /// The SPIR-V ModuleOp. + spirv::OwningSPIRVModuleRef module; + + /// The current function under construction. + Optional curFunction; + + /// The current block under construction. + Block *curBlock = nullptr; + + OpBuilder opBuilder; + + spirv::Version version; + + /// The list of capabilities used by the module. + llvm::SmallSetVector capabilities; + + /// The list of extensions used by the module. + llvm::SmallSetVector extensions; + + // Result to type mapping. + DenseMap typeMap; + + // Result to constant attribute and type mapping. + /// + /// In the SPIR-V binary format, all constants are placed in the module and + /// shared by instructions at module level and in subsequent functions. But in + /// the SPIR-V dialect, we materialize the constant to where it's used in the + /// function. So when seeing a constant instruction in the binary format, we + /// don't immediately emit a constant op into the module, we keep its value + /// (and type) here. Later when it's used, we materialize the constant. + DenseMap> constantMap; + + // Result to spec constant mapping. + DenseMap specConstMap; + + // Result to composite spec constant mapping. + DenseMap specConstCompositeMap; + + /// Result to info needed to materialize an OpSpecConstantOperation + /// mapping. + DenseMap + specConstOperationMap; + + // Result to variable mapping. + DenseMap globalVariableMap; + + // Result to function mapping. + DenseMap funcMap; + + // Result to block mapping. + DenseMap blockMap; + + // Header block to its merge (and continue) target mapping. + BlockMergeInfoMap blockMergeInfo; + + // Block to its phi (block argument) mapping. + DenseMap blockPhiInfo; + + // Result to value mapping. + DenseMap valueMap; + + // Mapping from result to undef value of a type. + DenseMap undefMap; + + // Result to name mapping. + DenseMap nameMap; + + // Result to debug info mapping. + DenseMap debugInfoMap; + + // Result to decorations mapping. + DenseMap decorations; + + // Result to type decorations. + DenseMap typeDecorations; + + // Result to member decorations. + // decorated-struct-type- -> + // (struct-member-index -> (decoration -> decoration-operands)) + DenseMap>>> + memberDecorationMap; + + // Result to member name. + // struct-type- -> (struct-member-index -> name) + DenseMap> memberNameMap; + + // Result to extended instruction set name. + DenseMap extendedInstSets; + + // List of instructions that are processed in a deferred fashion (after an + // initial processing of the entire binary). Some operations like + // OpEntryPoint, and OpExecutionMode use forward references to function + // s. In SPIR-V dialect the corresponding operations (spv.EntryPoint and + // spv.ExecutionMode) need these references resolved. So these instructions + // are deserialized and stored for processing once the entire binary is + // processed. + SmallVector>, 4> + deferredInstructions; + + /// A list of IDs for all types forward-declared through OpTypeForwardPointer + /// instructions. + llvm::SetVector typeForwardPointerIDs; + + /// A list of all structs which have unresolved member types. + SmallVector deferredStructTypesInfos; +}; + +} // namespace spirv +} // namespace mlir + +#endif // MLIR_TARGET_SPIRV_DESERIALIZER_H diff --git a/mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt b/mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt new file mode 100644 index 00000000000000..c4120960a22bf3 --- /dev/null +++ b/mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt @@ -0,0 +1,15 @@ +add_mlir_translation_library(MLIRSPIRVSerialization + Serialization.cpp + + DEPENDS + MLIRSPIRVSerializationGen + + LINK_LIBS PUBLIC + MLIRIR + MLIRSPIRV + MLIRSPIRVBinaryUtils + MLIRSupport + MLIRTranslation + ) + + diff --git a/mlir/lib/Target/SPIRV/Serialization.cpp b/mlir/lib/Target/SPIRV/Serialization/Serialization.cpp similarity index 100% rename from mlir/lib/Target/SPIRV/Serialization.cpp rename to mlir/lib/Target/SPIRV/Serialization/Serialization.cpp diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index 74fe1e0fdb08d5..20bf8773b137bb 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -996,11 +996,10 @@ static void emitDeserializationFunction(const Record *attrClass, /// based on the `opcode`. static void initDispatchDeserializationFn(StringRef opcode, StringRef words, raw_ostream &os) { - os << formatv( - "LogicalResult " - "Deserializer::dispatchToAutogenDeserialization(spirv::Opcode {0}, " - "ArrayRef {1}) {{\n", - opcode, words); + os << formatv("LogicalResult spirv::Deserializer::" + "dispatchToAutogenDeserialization(spirv::Opcode {0}," + " ArrayRef {1}) {{\n", + opcode, words); os << formatv(" switch ({0}) {{\n", opcode); } @@ -1043,8 +1042,8 @@ static void initExtendedSetDeserializationDispatch(StringRef extensionSetName, StringRef instructionID, StringRef words, raw_ostream &os) { - os << formatv("LogicalResult " - "Deserializer::dispatchToExtensionSetAutogenDeserialization(" + os << formatv("LogicalResult spirv::Deserializer::" + "dispatchToExtensionSetAutogenDeserialization(" "StringRef {0}, uint32_t {1}, ArrayRef {2}) {{\n", extensionSetName, instructionID, words); } From 93b54b7c6733fcb11fd6536499e73872d7452ffb Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Tue, 12 Jan 2021 15:56:58 +0000 Subject: [PATCH 36/86] [PowerPC][NFCI] PassSubtarget to ASMWriter Subtarget feature bits are needed to change instprinter's behavior based on feature bits. Most of the other popular targets were updated back in 2015, in https://reviews.llvm.org/rGb46d0234a6969 we should update it too. Reviewed By: sfertile Differential Revision: https://reviews.llvm.org/D94449 --- .../PowerPC/MCTargetDesc/PPCInstPrinter.cpp | 94 ++++++++++++------- .../PowerPC/MCTargetDesc/PPCInstPrinter.h | 85 +++++++++++------ llvm/lib/Target/PowerPC/PPC.td | 8 ++ 3 files changed, 122 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index 2eff1d94ce2048..a291a34d4c52db 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -71,11 +71,11 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, "reference expression if it is an expression at all."); O << "\taddis "; - printOperand(MI, 0, O); + printOperand(MI, 0, STI, O); O << ", "; - printOperand(MI, 2, O); + printOperand(MI, 2, STI, O); O << "("; - printOperand(MI, 1, O); + printOperand(MI, 1, STI, O); O << ")"; return; } @@ -94,7 +94,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) { const MCSymbol &Symbol = SymExpr->getSymbol(); if (MI->getOpcode() == PPC::PLDpc) { - printInstruction(MI, Address, O); + printInstruction(MI, Address, STI, O); O << "\n"; Symbol.print(O, &MAI); O << ":"; @@ -124,9 +124,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, SH = 32-SH; } if (useSubstituteMnemonic) { - printOperand(MI, 0, O); + printOperand(MI, 0, STI, O); O << ", "; - printOperand(MI, 1, O); + printOperand(MI, 1, STI, O); O << ", " << (unsigned int)SH; printAnnotation(O, Annot); @@ -141,9 +141,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH if (63-SH == ME) { O << "\tsldi "; - printOperand(MI, 0, O); + printOperand(MI, 0, STI, O); O << ", "; - printOperand(MI, 1, O); + printOperand(MI, 1, STI, O); O << ", " << (unsigned int)SH; printAnnotation(O, Annot); return; @@ -171,9 +171,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (IsBookE && TH != 0 && TH != 16) O << (unsigned int) TH << ", "; - printOperand(MI, 1, O); + printOperand(MI, 1, STI, O); O << ", "; - printOperand(MI, 2, O); + printOperand(MI, 2, STI, O); if (!IsBookE && TH != 0 && TH != 16) O << ", " << (unsigned int) TH; @@ -198,21 +198,22 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, O << "stps"; O << " "; - printOperand(MI, 1, O); + printOperand(MI, 1, STI, O); O << ", "; - printOperand(MI, 2, O); + printOperand(MI, 2, STI, O); printAnnotation(O, Annot); return; } } - if (!printAliasInstr(MI, Address, O)) - printInstruction(MI, Address, O); + if (!printAliasInstr(MI, Address, STI, O)) + printInstruction(MI, Address, STI, O); printAnnotation(O, Annot); } void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier) { unsigned Code = MI->getOperand(OpNo).getImm(); @@ -306,10 +307,11 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, assert(StringRef(Modifier) == "reg" && "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!"); - printOperand(MI, OpNo+1, O); + printOperand(MI, OpNo + 1, STI, O); } void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Code = MI->getOperand(OpNo).getImm(); if (Code == 2) @@ -319,6 +321,7 @@ void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 1 && "Invalid u1imm argument!"); @@ -326,6 +329,7 @@ void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 3 && "Invalid u2imm argument!"); @@ -333,6 +337,7 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 8 && "Invalid u3imm argument!"); @@ -340,6 +345,7 @@ void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 15 && "Invalid u4imm argument!"); @@ -347,6 +353,7 @@ void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { int Value = MI->getOperand(OpNo).getImm(); Value = SignExtend32<5>(Value); @@ -354,6 +361,7 @@ void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value == 0 && "Operand must be zero"); @@ -361,6 +369,7 @@ void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 31 && "Invalid u5imm argument!"); @@ -368,6 +377,7 @@ void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 63 && "Invalid u6imm argument!"); @@ -375,6 +385,7 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 127 && "Invalid u7imm argument!"); @@ -385,12 +396,14 @@ void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo, // of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and // print as unsigned. void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned char Value = MI->getOperand(OpNo).getImm(); O << (unsigned int)Value; } void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned short Value = MI->getOperand(OpNo).getImm(); assert(Value <= 1023 && "Invalid u10imm argument!"); @@ -398,6 +411,7 @@ void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned short Value = MI->getOperand(OpNo).getImm(); assert(Value <= 4095 && "Invalid u12imm argument!"); @@ -405,14 +419,16 @@ void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) O << (short)MI->getOperand(OpNo).getImm(); else - printOperand(MI, OpNo, O); + printOperand(MI, OpNo, STI, O); } void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) { long long Value = MI->getOperand(OpNo).getImm(); @@ -420,21 +436,24 @@ void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo, O << (long long)Value; } else - printOperand(MI, OpNo, O); + printOperand(MI, OpNo, STI, O); } void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) O << (unsigned short)MI->getOperand(OpNo).getImm(); else - printOperand(MI, OpNo, O); + printOperand(MI, OpNo, STI, O); } void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address, - unsigned OpNo, raw_ostream &O) { + unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { if (!MI->getOperand(OpNo).isImm()) - return printOperand(MI, OpNo, O); + return printOperand(MI, OpNo, STI, O); int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); if (PrintBranchImmAsAddress) { uint64_t Target = Address + Imm; @@ -457,16 +476,16 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address, } void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { if (!MI->getOperand(OpNo).isImm()) - return printOperand(MI, OpNo, O); + return printOperand(MI, OpNo, STI, O); O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); } - void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned CCReg = MI->getOperand(OpNo).getReg(); unsigned RegNo; switch (CCReg) { @@ -484,33 +503,37 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { - printS16ImmOperand(MI, OpNo, O); + printS16ImmOperand(MI, OpNo, STI, O); O << '('; if (MI->getOperand(OpNo+1).getReg() == PPC::R0) O << "0"; else - printOperand(MI, OpNo+1, O); + printOperand(MI, OpNo + 1, STI, O); O << ')'; } void PPCInstPrinter::printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { - printS34ImmOperand(MI, OpNo, O); + printS34ImmOperand(MI, OpNo, STI, O); O << '('; - printImmZeroOperand(MI, OpNo + 1, O); + printImmZeroOperand(MI, OpNo + 1, STI, O); O << ')'; } void PPCInstPrinter::printMemRegImm34(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printS34ImmOperand(MI, OpNo, O); + const MCSubtargetInfo &STI, + raw_ostream &O) { + printS34ImmOperand(MI, OpNo, STI, O); O << '('; - printOperand(MI, OpNo + 1, O); + printOperand(MI, OpNo + 1, STI, O); O << ')'; } void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { // When used as the base register, r0 reads constant zero rather than // the value contained in the register. For this reason, the darwin @@ -518,13 +541,13 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, if (MI->getOperand(OpNo).getReg() == PPC::R0) O << "0"; else - printOperand(MI, OpNo, O); + printOperand(MI, OpNo, STI, O); O << ", "; - printOperand(MI, OpNo+1, O); + printOperand(MI, OpNo + 1, STI, O); } void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must // come at the _end_ of the expression. const MCOperand &Op = MI->getOperand(OpNo); @@ -544,7 +567,7 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, if (RefExp->getKind() == MCSymbolRefExpr::VK_PPC_NOTOC) O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind()); O << '('; - printOperand(MI, OpNo+1, O); + printOperand(MI, OpNo + 1, STI, O); O << ')'; if (RefExp->getKind() != MCSymbolRefExpr::VK_None && RefExp->getKind() != MCSymbolRefExpr::VK_PPC_NOTOC) @@ -602,7 +625,7 @@ bool PPCInstPrinter::showRegistersWithPrefix() const { } void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); @@ -631,4 +654,3 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, assert(Op.isExpr() && "unknown operand kind in printOperand"); Op.getExpr()->print(O, &MAI); } - diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h index 2b5414458ce6b4..5e9b01494416ab 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h @@ -37,45 +37,72 @@ class PPCInstPrinter : public MCInstPrinter { // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; - void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + void printInstruction(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); + bool printAliasInstr(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &OS); void printCustomAliasOperand(const MCInst *MI, uint64_t Address, unsigned OpIdx, unsigned PrintMethodIdx, - raw_ostream &OS); + const MCSubtargetInfo &STI, raw_ostream &OS); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier = nullptr); - void printATBitsAsHint(const MCInst *MI, unsigned OpNo, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O, + const char *Modifier = nullptr); + void printATBitsAsHint(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); - void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU7ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printS34ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printImmZeroOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU1ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU2ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU4ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printS5ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU5ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU6ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU7ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU10ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU12ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printS34ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printImmZeroOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo, - raw_ostream &O); - void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printTLSCall(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); - void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printcrbitm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); - void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemRegImm34(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemRegImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMemRegImm34(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMemRegReg(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 06403f5e55a20b..f60adecf65ee53 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -598,6 +598,13 @@ def PPCInstrInfo : InstrInfo { let noNamedPositionallyEncodedOperands = 1; } +def PPCAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + def PPCAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; } @@ -616,6 +623,7 @@ def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; + let AssemblyWriters = [PPCAsmWriter]; let AssemblyParsers = [PPCAsmParser]; let AssemblyParserVariants = [PPCAsmParserVariant]; let AllowRegisterRenaming = 1; From 67a339e96839cdecb5efad0e2731ab20a4ee458e Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 6 Jan 2021 16:32:59 -0800 Subject: [PATCH 37/86] [MLIR] Disallow `sym_visibility`, `sym_name` and `type` attributes in the parsed attribute dictionary. Differential Revision: https://reviews.llvm.org/D94200 --- mlir/lib/IR/FunctionImplementation.cpp | 19 +++++++++++++++++-- mlir/test/Dialect/Tosa/inlining.mlir | 8 ++++---- mlir/test/IR/core-ops.mlir | 3 --- mlir/test/IR/invalid-func-op.mlir | 16 ++++++++++++++++ 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/mlir/lib/IR/FunctionImplementation.cpp b/mlir/lib/IR/FunctionImplementation.cpp index 90ea91d49fb6ea..4bec1684b5eeb9 100644 --- a/mlir/lib/IR/FunctionImplementation.cpp +++ b/mlir/lib/IR/FunctionImplementation.cpp @@ -180,7 +180,7 @@ mlir::impl::parseFunctionLikeOp(OpAsmParser &parser, OperationState &result, return failure(); // Parse the function signature. - auto signatureLocation = parser.getCurrentLocation(); + llvm::SMLoc signatureLocation = parser.getCurrentLocation(); bool isVariadic = false; if (parseFunctionSignature(parser, allowVariadic, entryArgs, argTypes, argAttrs, isVariadic, resultTypes, resultAttrs)) @@ -196,9 +196,24 @@ mlir::impl::parseFunctionLikeOp(OpAsmParser &parser, OperationState &result, << (errorMessage.empty() ? "" : ": ") << errorMessage; // If function attributes are present, parse them. - if (parser.parseOptionalAttrDictWithKeyword(result.attributes)) + NamedAttrList parsedAttributes; + llvm::SMLoc attributeDictLocation = parser.getCurrentLocation(); + if (parser.parseOptionalAttrDictWithKeyword(parsedAttributes)) return failure(); + // Disallow attributes that are inferred from elsewhere in the attribute + // dictionary. + for (StringRef disallowed : + {SymbolTable::getVisibilityAttrName(), SymbolTable::getSymbolAttrName(), + getTypeAttrName()}) { + if (parsedAttributes.get(disallowed)) + return parser.emitError(attributeDictLocation, "'") + << disallowed + << "' is an inferred attribute and should not be specified in the " + "explicit attribute dictionary"; + } + result.attributes.append(parsedAttributes); + // Add the attributes to the function arguments. assert(argAttrs.size() == argTypes.size()); assert(resultAttrs.size() == resultTypes.size()); diff --git a/mlir/test/Dialect/Tosa/inlining.mlir b/mlir/test/Dialect/Tosa/inlining.mlir index 363358b0781b99..f6789fafe3ed63 100644 --- a/mlir/test/Dialect/Tosa/inlining.mlir +++ b/mlir/test/Dialect/Tosa/inlining.mlir @@ -19,11 +19,11 @@ func @inlined_if_fn(%arg0: tensor, %arg1: tensor, %arg2: tensor) - }) : (tensor, tensor, tensor) -> tensor return %0 : tensor } -func @add(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "private"} { +func private @add(%arg0: tensor, %arg1: tensor) -> tensor { %0 = "tosa.add"(%arg0, %arg1) : (tensor, tensor) -> tensor return %0 : tensor } -func @sub(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "private"} { +func private @sub(%arg0: tensor, %arg1: tensor) -> tensor { %0 = "tosa.sub"(%arg0, %arg1) : (tensor, tensor) -> tensor return %0 : tensor } @@ -45,12 +45,12 @@ func @inlined_while_fn(%arg0: tensor, %arg1: tensor, %arg2: tensor, tensor, tensor, tensor<10xi32>) -> (tensor, tensor, tensor, tensor<10xi32>) return %1#3 : tensor<10xi32> } -func @while_body_50(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<10xi32>) -> (tensor, tensor, tensor, tensor<10xi32>) attributes {sym_visibility = "private"} { +func private @while_body_50(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<10xi32>) -> (tensor, tensor, tensor, tensor<10xi32>) { %1 = "tosa.add"(%arg0, %arg1) : (tensor, tensor) -> tensor %2 = "tosa.add"(%arg3, %1) : (tensor<10xi32>, tensor) -> tensor<10xi32> return %1, %arg1, %arg2, %2: tensor, tensor, tensor, tensor<10xi32> } -func @while_cond_40(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<10xi32>) -> tensor attributes {sym_visibility = "private"} { +func private @while_cond_40(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<10xi32>) -> tensor { %0 = "tosa.greater_equal"(%arg0, %arg1) : (tensor, tensor) -> tensor %1 = "tosa.logical_not"(%0) : (tensor) -> tensor return %1 : tensor diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index fc712d4939ba82..396211c10430a5 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -942,6 +942,3 @@ func @subtensor_insert(%t: tensor<8x16x4xf32>, %t2: tensor<16x32x8xf32>, %idx : return } - -// CHECK-LABEL: func private @legacy_visibility_syntax -func @legacy_visibility_syntax() attributes { sym_visibility = "private" } diff --git a/mlir/test/IR/invalid-func-op.mlir b/mlir/test/IR/invalid-func-op.mlir index afa73699807683..c2ceefeb3ee76c 100644 --- a/mlir/test/IR/invalid-func-op.mlir +++ b/mlir/test/IR/invalid-func-op.mlir @@ -78,3 +78,19 @@ func @f(%arg0: i64) -> (i64 {test.invalid_attr}) { // expected-error@+1 {{symbol declaration cannot have public visibility}} func @invalid_public_declaration() + +// ----- + +// expected-error@+1 {{'sym_visibility' is an inferred attribute and should not be specified in the explicit attribute dictionary}} +func @legacy_visibility_syntax() attributes { sym_visibility = "private" } + +// ----- + +// expected-error@+1 {{'sym_name' is an inferred attribute and should not be specified in the explicit attribute dictionary}} +func private @invalid_symbol_name_attr() attributes { sym_name = "x" } + +// ----- + +// expected-error@+1 {{'type' is an inferred attribute and should not be specified in the explicit attribute dictionary}} +func private @invalid_symbol_type_attr() attributes { type = "x" } + From 85aaa3e310c23ec8a375b7a2e2fceee5a72441ef Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 12 Jan 2021 17:24:34 +0000 Subject: [PATCH 38/86] [X86] Regenerate sdiv_fix_sat.ll + udiv_fix_sat.ll tests Adding missing libcall PLT qualifiers --- llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 20 ++++++++++---------- llvm/test/CodeGen/X86/udiv_fix_sat.ll | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index 512488e8f8725d..617d5d7876bd80 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -322,7 +322,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: movq %r15, %rdi ; X64-NEXT: movq %r12, %rsi ; X64-NEXT: movq %r13, %rcx -; X64-NEXT: callq __divti3 +; X64-NEXT: callq __divti3@PLT ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rbp @@ -338,7 +338,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: movq %r12, %rsi ; X64-NEXT: movq (%rsp), %rdx # 8-byte Reload ; X64-NEXT: movq %r13, %rcx -; X64-NEXT: callq __modti3 +; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: testb %r14b, %al @@ -613,7 +613,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %r12, %rdi ; X64-NEXT: movq %rbp, %rsi ; X64-NEXT: movq %r15, %rcx -; X64-NEXT: callq __divti3 +; X64-NEXT: callq __divti3@PLT ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 @@ -626,7 +626,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rbp, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %r15, %rcx -; X64-NEXT: callq __modti3 +; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al @@ -668,7 +668,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %r15, %rdi ; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: callq __divti3 +; X64-NEXT: callq __divti3@PLT ; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 @@ -681,7 +681,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: callq __modti3 +; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al @@ -735,7 +735,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %r15, %rdi ; X64-NEXT: movq %r12, %rsi ; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: callq __divti3 +; X64-NEXT: callq __divti3@PLT ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 @@ -748,7 +748,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %r12, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: callq __modti3 +; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al @@ -790,7 +790,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %r15, %rdi ; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: callq __divti3 +; X64-NEXT: callq __divti3@PLT ; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 @@ -803,7 +803,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: callq __modti3 +; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll index d2e3b80c214527..2be51c3ccbbaaf 100644 --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -179,7 +179,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: shlq $32, %rdi ; X64-NEXT: xorl %ebx, %ebx ; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: callq __udivti3 +; X64-NEXT: callq __udivti3@PLT ; X64-NEXT: cmpq $-1, %rax ; X64-NEXT: movq $-1, %rcx ; X64-NEXT: cmovbq %rax, %rcx From dd955771240289fbcba5fa1312cb8c78f20cd78f Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 12 Jan 2021 09:56:06 -0800 Subject: [PATCH 39/86] Fix typo in diagnostic message rdar://66684531 --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 19b003398b025d..717bf6e12ccdd5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3263,7 +3263,7 @@ def warn_attribute_dllexport_explicit_instantiation_def : Warning< "'dllexport' attribute ignored on explicit instantiation definition">, InGroup; def warn_invalid_initializer_from_system_header : Warning< - "invalid constructor form class in system header, should not be explicit">, + "invalid constructor from class in system header, should not be explicit">, InGroup>; def note_used_in_initialization_here : Note<"used in initialization here">; def err_attribute_dll_member_of_dll_class : Error< From a4931d4fe38d6feef53f97f3dcc7792bfcb06c84 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 12 Jan 2021 18:01:41 +0000 Subject: [PATCH 40/86] [AMDGPU] Regenerate umax crash test --- .../CodeGen/AMDGPU/r600-legalize-umax-bug.ll | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll index b7ed34bbf09b77..b4cd36daad659e 100644 --- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll @@ -1,8 +1,27 @@ -; RUN: llc -march=r600 -mcpu=cypress -start-after safe-stack %s -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=r600 -mcpu=cypress -start-after safe-stack | FileCheck %s ; Don't crash -; CHECK: MAX_UINT define amdgpu_kernel void @test(i64 addrspace(1)* %out) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: ALU 4, @6, KC0[CB0:0-32], KC1[] +; CHECK-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 0 +; CHECK-NEXT: ALU 3, @11, KC0[], KC1[] +; CHECK-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; CHECK-NEXT: CF_END +; CHECK-NEXT: PAD +; CHECK-NEXT: ALU clause starting at 6: +; CHECK-NEXT: MOV T0.X, literal.x, +; CHECK-NEXT: MOV T0.Y, 0.0, +; CHECK-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; CHECK-NEXT: MOV * T0.W, KC0[2].Y, +; CHECK-NEXT: ALU clause starting at 11: +; CHECK-NEXT: MAX_UINT T0.X, T0.X, literal.x, +; CHECK-NEXT: MOV T0.Y, 0.0, +; CHECK-NEXT: LSHR * T1.X, T0.W, literal.y, +; CHECK-NEXT: 4(5.605194e-45), 2(2.802597e-45) bb: store i64 2, i64 addrspace(1)* %out %tmp = load i64, i64 addrspace(1)* %out From 3d9c51d111d0c8480d10fc48fb621bac1d080449 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Tue, 12 Jan 2021 17:48:52 +0000 Subject: [PATCH 41/86] [SVE][NFC] Regenerate a few CodeGen tests Regenerated using llvm/utils/update_llc_test_checks.py as part of D94504, committing separately to reduce the diff for D94504. --- .../AArch64/sve-intrinsics-loads-nf.ll | 229 +++++++++++------- ...contiguous-ldst-addressing-mode-reg-imm.ll | 227 +++++++++-------- ...n-temporal-ldst-addressing-mode-reg-imm.ll | 74 +++--- 3 files changed, 311 insertions(+), 219 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll index be1c03a754fee3..44d4b1d2756041 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t @@ -10,18 +11,20 @@ define @ldnf1b( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b: -; CHECK: ldnf1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a) ret %load } define @ldnf1b_out_of_lower_bound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_out_of_lower_bound: -; CHECK: rdvl x[[OFFSET:[0-9]+]], #-9 -; CHECK-NEXT: add x[[BASE:[0-9]+]], x0, x[[OFFSET]] -; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x[[BASE]]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #-9 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 -9 %base_scalar = bitcast * %base to i8* @@ -31,8 +34,9 @@ define @ldnf1b_out_of_lower_bound( %pg, i8* define @ldnf1b_lower_bound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_lower_bound: -; CHECK: ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 -8 %base_scalar = bitcast * %base to i8* @@ -42,8 +46,9 @@ define @ldnf1b_lower_bound( %pg, i8* %a) { define @ldnf1b_inbound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_inbound: -; CHECK: ldnf1b { z0.b }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 1 %base_scalar = bitcast * %base to i8* @@ -53,8 +58,9 @@ define @ldnf1b_inbound( %pg, i8* %a) { define @ldnf1b_upper_bound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_upper_bound: -; CHECK: ldnf1b { z0.b }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i8* @@ -64,10 +70,11 @@ define @ldnf1b_upper_bound( %pg, i8* %a) { define @ldnf1b_out_of_upper_bound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_out_of_upper_bound: -; CHECK: rdvl x[[OFFSET:[0-9]+]], #8 -; CHECK-NEXT: add x[[BASE:[0-9]+]], x0, x[[OFFSET]] -; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x[[BASE]]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #8 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 8 %base_scalar = bitcast * %base to i8* @@ -77,8 +84,9 @@ define @ldnf1b_out_of_upper_bound( %pg, i8* define @ldnf1b_h( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_h: -; CHECK: ldnf1b { z0.h }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) %res = zext %load to ret %res @@ -86,8 +94,9 @@ define @ldnf1b_h( %pg, i8* %a) { define @ldnf1b_h_inbound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_h_inbound: -; CHECK: ldnf1b { z0.h }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i8* @@ -98,8 +107,9 @@ define @ldnf1b_h_inbound( %pg, i8* %a) { define @ldnf1sb_h( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_h: -; CHECK: ldnf1sb { z0.h }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) %res = sext %load to ret %res @@ -107,8 +117,9 @@ define @ldnf1sb_h( %pg, i8* %a) { define @ldnf1sb_h_inbound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_h_inbound: -; CHECK: ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i8* @@ -119,16 +130,18 @@ define @ldnf1sb_h_inbound( %pg, i8* %a) { define @ldnf1h( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h: -; CHECK: ldnf1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a) ret %load } define @ldnf1h_inbound( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h_inbound: -; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i16* %a to * %base = getelementptr , * %base_scalable, i64 1 %base_scalar = bitcast * %base to i16* @@ -138,24 +151,27 @@ define @ldnf1h_inbound( %pg, i16* %a) { define @ldnf1h_f16( %pg, half* %a) { ; CHECK-LABEL: ldnf1h_f16: -; CHECK: ldnf1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a) ret %load } define @ldnf1h_bf16( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldnf1h_bf16: -; CHECK: ldnf1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv8bf16( %pg, bfloat* %a) ret %load } define @ldnf1h_f16_inbound( %pg, half* %a) { ; CHECK-LABEL: ldnf1h_f16_inbound: -; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast half* %a to * %base = getelementptr , * %base_scalable, i64 1 %base_scalar = bitcast * %base to half* @@ -165,8 +181,9 @@ define @ldnf1h_f16_inbound( %pg, half* %a) define @ldnf1h_bf16_inbound( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldnf1h_bf16_inbound: -; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast bfloat* %a to * %base = getelementptr , * %base_scalable, i64 1 %base_scalar = bitcast * %base to bfloat* @@ -176,8 +193,9 @@ define @ldnf1h_bf16_inbound( %pg, bfloat* define @ldnf1b_s( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_s: -; CHECK: ldnf1b { z0.s }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) %res = zext %load to ret %res @@ -185,8 +203,9 @@ define @ldnf1b_s( %pg, i8* %a) { define @ldnf1b_s_inbound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_s_inbound: -; CHECK: ldnf1b { z0.s }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i8* @@ -197,8 +216,9 @@ define @ldnf1b_s_inbound( %pg, i8* %a) { define @ldnf1sb_s( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_s: -; CHECK: ldnf1sb { z0.s }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) %res = sext %load to ret %res @@ -206,8 +226,9 @@ define @ldnf1sb_s( %pg, i8* %a) { define @ldnf1sb_s_inbound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_s_inbound: -; CHECK: ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i8* @@ -218,8 +239,9 @@ define @ldnf1sb_s_inbound( %pg, i8* %a) { define @ldnf1h_s( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h_s: -; CHECK: ldnf1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) %res = zext %load to ret %res @@ -227,8 +249,9 @@ define @ldnf1h_s( %pg, i16* %a) { define @ldnf1h_s_inbound( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h_s_inbound: -; CHECK: ldnf1h { z0.s }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i16* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i16* @@ -239,8 +262,9 @@ define @ldnf1h_s_inbound( %pg, i16* %a) { define @ldnf1sh_s( %pg, i16* %a) { ; CHECK-LABEL: ldnf1sh_s: -; CHECK: ldnf1sh { z0.s }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) %res = sext %load to ret %res @@ -248,8 +272,9 @@ define @ldnf1sh_s( %pg, i16* %a) { define @ldnf1sh_s_inbound( %pg, i16* %a) { ; CHECK-LABEL: ldnf1sh_s_inbound: -; CHECK: ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i16* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i16* @@ -260,16 +285,18 @@ define @ldnf1sh_s_inbound( %pg, i16* %a) { define @ldnf1w( %pg, i32* %a) { ; CHECK-LABEL: ldnf1w: -; CHECK: ldnf1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a) ret %load } define @ldnf1w_inbound( %pg, i32* %a) { ; CHECK-LABEL: ldnf1w_inbound: -; CHECK: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i32* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i32* @@ -279,16 +306,18 @@ define @ldnf1w_inbound( %pg, i32* %a) { define @ldnf1w_f32( %pg, float* %a) { ; CHECK-LABEL: ldnf1w_f32: -; CHECK: ldnf1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a) ret %load } define @ldnf1w_f32_inbound( %pg, float* %a) { ; CHECK-LABEL: ldnf1w_f32_inbound: -; CHECK: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast float* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to float* @@ -298,8 +327,9 @@ define @ldnf1w_f32_inbound( %pg, float* %a define @ldnf1b_d( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_d: -; CHECK: ldnf1b { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) %res = zext %load to ret %res @@ -307,8 +337,9 @@ define @ldnf1b_d( %pg, i8* %a) { define @ldnf1b_d_inbound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_d_inbound: -; CHECK: ldnf1b { z0.d }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i8* @@ -319,8 +350,9 @@ define @ldnf1b_d_inbound( %pg, i8* %a) { define @ldnf1sb_d( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_d: -; CHECK: ldnf1sb { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) %res = sext %load to ret %res @@ -328,8 +360,9 @@ define @ldnf1sb_d( %pg, i8* %a) { define @ldnf1sb_d_inbound( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_d_inbound: -; CHECK: ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i8* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i8* @@ -340,8 +373,9 @@ define @ldnf1sb_d_inbound( %pg, i8* %a) { define @ldnf1h_d( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h_d: -; CHECK: ldnf1h { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) %res = zext %load to ret %res @@ -349,8 +383,9 @@ define @ldnf1h_d( %pg, i16* %a) { define @ldnf1h_d_inbound( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h_d_inbound: -; CHECK: ldnf1h { z0.d }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i16* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i16* @@ -361,8 +396,9 @@ define @ldnf1h_d_inbound( %pg, i16* %a) { define @ldnf1sh_d( %pg, i16* %a) { ; CHECK-LABEL: ldnf1sh_d: -; CHECK: ldnf1sh { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) %res = sext %load to ret %res @@ -370,8 +406,9 @@ define @ldnf1sh_d( %pg, i16* %a) { define @ldnf1sh_d_inbound( %pg, i16* %a) { ; CHECK-LABEL: ldnf1sh_d_inbound: -; CHECK: ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i16* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i16* @@ -382,8 +419,9 @@ define @ldnf1sh_d_inbound( %pg, i16* %a) { define @ldnf1w_d( %pg, i32* %a) { ; CHECK-LABEL: ldnf1w_d: -; CHECK: ldnf1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) %res = zext %load to ret %res @@ -391,8 +429,9 @@ define @ldnf1w_d( %pg, i32* %a) { define @ldnf1w_d_inbound( %pg, i32* %a) { ; CHECK-LABEL: ldnf1w_d_inbound: -; CHECK: ldnf1w { z0.d }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i32* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i32* @@ -403,8 +442,9 @@ define @ldnf1w_d_inbound( %pg, i32* %a) { define @ldnf1sw_d( %pg, i32* %a) { ; CHECK-LABEL: ldnf1sw_d: -; CHECK: ldnf1sw { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) %res = sext %load to ret %res @@ -412,8 +452,9 @@ define @ldnf1sw_d( %pg, i32* %a) { define @ldnf1sw_d_inbound( %pg, i32* %a) { ; CHECK-LABEL: ldnf1sw_d_inbound: -; CHECK: ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i32* %a to * %base = getelementptr , * %base_scalable, i64 7 %base_scalar = bitcast * %base to i32* @@ -424,16 +465,18 @@ define @ldnf1sw_d_inbound( %pg, i32* %a) { define @ldnf1d( %pg, i64* %a) { ; CHECK-LABEL: ldnf1d: -; CHECK: ldnf1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a) ret %load } define @ldnf1d_inbound( %pg, i64* %a) { ; CHECK-LABEL: ldnf1d_inbound: -; CHECK: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast i64* %a to * %base = getelementptr , * %base_scalable, i64 1 %base_scalar = bitcast * %base to i64* @@ -443,16 +486,18 @@ define @ldnf1d_inbound( %pg, i64* %a) { define @ldnf1d_f64( %pg, double* %a) { ; CHECK-LABEL: ldnf1d_f64: -; CHECK: ldnf1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret %load = call @llvm.aarch64.sve.ldnf1.nxv2f64( %pg, double* %a) ret %load } define @ldnf1d_f64_inbound( %pg, double* %a) { ; CHECK-LABEL: ldnf1d_f64_inbound: -; CHECK: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_scalable = bitcast double* %a to * %base = getelementptr , * %base_scalable, i64 1 %base_scalar = bitcast * %base to double* diff --git a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll index 6065dbdd2765e6..ed0c9f278f0ab1 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. @@ -11,13 +12,14 @@ define void @imm_out_of_range( * %base, %mask) nounwind { ; CHECK-LABEL: imm_out_of_range: -; CHECK-NEXT: rdvl x8, #8 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: ld1d { z[[DATA:[0-9]+]].d }, p0/z, [x{{[0-9]+}}] -; CHECK-NEXT: rdvl x8, #-9 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: st1d { z[[DATA]].d }, p0, [x{{[0-9]+}}] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #8 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] +; CHECK-NEXT: rdvl x8, #-9 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 8 %data = call @llvm.masked.load.nxv2i64(* %base_load, i32 1, @@ -35,9 +37,10 @@ define void @imm_out_of_range( * %base, %mas define void @test_masked_ldst_sv2i8( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i8: -; CHECK-NEXT: ld1sb { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: st1b { z[[DATA]].d }, p0, [x0, #-7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: st1b { z0.d }, p0, [x0, #-7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i8(* %base_load, i32 1, @@ -53,9 +56,10 @@ define void @test_masked_ldst_sv2i8( * %base, define void @test_masked_ldst_sv2i16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i16: -; CHECK-NEXT: ld1sh { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: st1h { z[[DATA]].d }, p0, [x0, #-7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, #-7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i16(* %base_load, i32 1, @@ -72,9 +76,10 @@ define void @test_masked_ldst_sv2i16( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i32: -; CHECK-NEXT: ld1sw { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: st1w { z[[DATA]].d }, p0, [x0, #-7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, #-7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i32(* %base_load, i32 1, @@ -90,9 +95,10 @@ define void @test_masked_ldst_sv2i32( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i64: -; CHECK-NEXT: ld1d { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: st1d { z[[DATA]].d }, p0, [x0, #-7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #-7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i64(* %base_load, i32 1, @@ -108,9 +114,10 @@ define void @test_masked_ldst_sv2i64( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2f16: -; CHECK-NEXT: ld1h { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: st1h { z[[DATA]].d }, p0, [x0, #-7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, #-7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2f16(* %base_load, i32 1, @@ -127,9 +134,10 @@ define void @test_masked_ldst_sv2f16( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2f32: -; CHECK-NEXT: ld1w { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: st1w { z[[DATA]].d }, p0, [x0, #-7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, #-7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2f32(* %base_load, i32 1, @@ -145,9 +153,10 @@ define void @test_masked_ldst_sv2f32( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2f64: -; CHECK-NEXT: ld1d { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-6, mul vl] -; CHECK-NEXT: st1d { z[[DATA]].d }, p0, [x0, #-5, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #-6, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #-5, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -6 %data = call @llvm.masked.load.nxv2f64(* %base_load, i32 1, @@ -165,8 +174,9 @@ define void @test_masked_ldst_sv2f64( * %base, @masked_zload_sv2i8_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv2i8_to_sv2i64: -; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #-4, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #-4, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -4 %load = call @llvm.masked.load.nxv2i8(* %base_load, i32 1, @@ -178,8 +188,9 @@ define @masked_zload_sv2i8_to_sv2i64(* %base define @masked_sload_sv2i8_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv2i8_to_sv2i64: -; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, #-3, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, #-3, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -3 %load = call @llvm.masked.load.nxv2i8(* %base_load, i32 1, @@ -191,8 +202,9 @@ define @masked_sload_sv2i8_to_sv2i64(* %base define @masked_zload_sv2i16_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv2i16_to_sv2i64: -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 1 %load = call @llvm.masked.load.nxv2i16(* %base_load, i32 1, @@ -204,8 +216,9 @@ define @masked_zload_sv2i16_to_sv2i64(* %ba define @masked_sload_sv2i16_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv2i16_to_sv2i64: -; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 2 %load = call @llvm.masked.load.nxv2i16(* %base_load, i32 1, @@ -217,8 +230,9 @@ define @masked_sload_sv2i16_to_sv2i64(* %ba define @masked_zload_sv2i32_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv2i32_to_sv2i64: -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #-2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #-2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -2 %load = call @llvm.masked.load.nxv2i32(* %base_load, i32 1, @@ -230,8 +244,9 @@ define @masked_zload_sv2i32_to_sv2i64(* %ba define @masked_sload_sv2i32_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv2i32_to_sv2i64: -; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %load = call @llvm.masked.load.nxv2i32(* %base_load, i32 1, @@ -245,8 +260,9 @@ define @masked_sload_sv2i32_to_sv2i64(* %ba define void @masked_trunc_store_sv2i64_to_sv2i8( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv2i64_to_sv2i8: -; CHECK-NEXT: st1b { z0.d }, p0, [x0, #3, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.d }, p0, [x0, #3, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 3 %trunc = trunc %val to call void @llvm.masked.store.nxv2i8( %trunc, @@ -259,8 +275,9 @@ define void @masked_trunc_store_sv2i64_to_sv2i8( %val, %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv2i64_to_sv2i16: -; CHECK-NEXT: st1h { z0.d }, p0, [x0, #4, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [x0, #4, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 4 %trunc = trunc %val to call void @llvm.masked.store.nxv2i16( %trunc, @@ -272,8 +289,9 @@ define void @masked_trunc_store_sv2i64_to_sv2i16( %val, %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv2i64_to_sv2i32: -; CHECK-NEXT: st1w { z0.d }, p0, [x0, #5, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.d }, p0, [x0, #5, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 5 %trunc = trunc %val to call void @llvm.masked.store.nxv2i32( %trunc, @@ -287,9 +305,10 @@ define void @masked_trunc_store_sv2i64_to_sv2i32( %val, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4i8: -; CHECK-NEXT: ld1sb { z[[DATA:[0-9]+]].s }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: st1b { z[[DATA]].s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: st1b { z0.s }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4i8(* %base_load, i32 1, @@ -305,9 +324,10 @@ define void @test_masked_ldst_sv4i8( * %base, define void @test_masked_ldst_sv4i16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4i16: -; CHECK-NEXT: ld1sh { z[[DATA:[0-9]+]].s }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: st1h { z[[DATA]].s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4i16(* %base_load, i32 1, @@ -323,9 +343,10 @@ define void @test_masked_ldst_sv4i16( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4i32: -; CHECK-NEXT: ld1w { z[[DATA:[0-9]+]].s }, p0/z, [x0, #6, mul vl] -; CHECK-NEXT: st1w { z[[DATA]].s }, p0, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv4i32(* %base_load, i32 1, @@ -341,9 +362,10 @@ define void @test_masked_ldst_sv4i32( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4f16: -; CHECK-NEXT: ld1h { z[[DATA:[0-9]+]].s }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: st1h { z[[DATA]].s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4f16(* %base_load, i32 1, @@ -359,9 +381,10 @@ define void @test_masked_ldst_sv4f16( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4f32: -; CHECK-NEXT: ld1w { z[[DATA:[0-9]+]].s }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: st1w { z[[DATA]].s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4f32(* %base_load, i32 1, @@ -379,8 +402,9 @@ define void @test_masked_ldst_sv4f32( * %base, @masked_zload_sv4i8_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv4i8_to_sv4i32: -; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #-4, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #-4, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -4 %load = call @llvm.masked.load.nxv4i8(* %base_load, i32 1, @@ -392,8 +416,9 @@ define @masked_zload_sv4i8_to_sv4i32(* %base define @masked_sload_sv4i8_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv4i8_to_sv4i32: -; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, #-3, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, #-3, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -3 %load = call @llvm.masked.load.nxv4i8(* %base_load, i32 1, @@ -405,8 +430,9 @@ define @masked_sload_sv4i8_to_sv4i32(* %base define @masked_zload_sv4i16_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv4i16_to_sv4i32: -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 1 %load = call @llvm.masked.load.nxv4i16(* %base_load, i32 1, @@ -418,8 +444,9 @@ define @masked_zload_sv4i16_to_sv4i32(* %ba define @masked_sload_sv4i16_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv4i16_to_sv4i32: -; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 2 %load = call @llvm.masked.load.nxv4i16(* %base_load, i32 1, @@ -433,8 +460,9 @@ define @masked_sload_sv4i16_to_sv4i32(* %ba define void @masked_trunc_store_sv4i32_to_sv4i8( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv4i32_to_sv4i8: -; CHECK-NEXT: st1b { z0.s }, p0, [x0, #3, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.s }, p0, [x0, #3, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 3 %trunc = trunc %val to call void @llvm.masked.store.nxv4i8( %trunc, @@ -447,8 +475,9 @@ define void @masked_trunc_store_sv4i32_to_sv4i8( %val, %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv4i32_to_sv4i16: -; CHECK-NEXT: st1h { z0.s }, p0, [x0, #4, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.s }, p0, [x0, #4, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 4 %trunc = trunc %val to call void @llvm.masked.store.nxv4i16( %trunc, @@ -462,9 +491,10 @@ define void @masked_trunc_store_sv4i32_to_sv4i16( %val, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8i8: -; CHECK-NEXT: ld1sb { z[[DATA:[0-9]+]].h }, p0/z, [x0, #6, mul vl] -; CHECK-NEXT: st1b { z[[DATA]].h }, p0, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: st1b { z0.h }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv8i8(* %base_load, i32 1, @@ -480,9 +510,10 @@ define void @test_masked_ldst_sv8i8( * %base, define void @test_masked_ldst_sv8i16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8i16: -; CHECK-NEXT: ld1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, #6, mul vl] -; CHECK-NEXT: st1h { z[[DATA]].h }, p0, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv8i16(* %base_load, i32 1, @@ -498,9 +529,10 @@ define void @test_masked_ldst_sv8i16( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8f16: -; CHECK-NEXT: ld1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: st1h { z[[DATA]].h }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv8f16(* %base_load, i32 1, @@ -516,9 +548,10 @@ define void @test_masked_ldst_sv8f16( * %base, * %base, %mask) nounwind #0 { ; CHECK-LABEL: test_masked_ldst_sv8bf16: -; CHECK-NEXT: ld1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: st1h { z[[DATA]].h }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv8bf16(* %base_load, i32 1, @@ -536,8 +569,9 @@ define void @test_masked_ldst_sv8bf16( * %base, @masked_zload_sv8i8_to_sv8i16(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv8i8_to_sv8i16: -; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0, #-4, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0, #-4, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -4 %load = call @llvm.masked.load.nxv8i8(* %base_load, i32 1, @@ -549,8 +583,9 @@ define @masked_zload_sv8i8_to_sv8i16(* %base define @masked_sload_sv8i8_to_sv8i16(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv8i8_to_sv8i16: -; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, #-3, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, #-3, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -3 %load = call @llvm.masked.load.nxv8i8(* %base_load, i32 1, @@ -564,8 +599,9 @@ define @masked_sload_sv8i8_to_sv8i16(* %base define void @masked_trunc_store_sv8i16_to_sv8i8( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv8i16_to_sv8i8: -; CHECK-NEXT: st1b { z0.h }, p0, [x0, #3, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.h }, p0, [x0, #3, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 3 %trunc = trunc %val to call void @llvm.masked.store.nxv8i8( %trunc, @@ -579,9 +615,10 @@ define void @masked_trunc_store_sv8i16_to_sv8i8( %val, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv16i8: -; CHECK-NEXT: ld1b { z[[DATA:[0-9]+]].b }, p0/z, [x0, #6, mul vl] -; CHECK-NEXT: st1b { z[[DATA]].b }, p0, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: st1b { z0.b }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv16i8(* %base_load, i32 1, diff --git a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll index 6917d1d549ab4b..d4d0b965b235ff 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. @@ -11,13 +12,14 @@ define void @imm_out_of_range( * %base, %mask) nounwind { ; CHECK-LABEL: imm_out_of_range: -; CHECK-NEXT: rdvl x8, #8 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: ldnt1d { z[[DATA:[0-9]+]].d }, p0/z, [x{{[0-9]+}}] -; CHECK-NEXT: rdvl x8, #-9 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x{{[0-9]+}}] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #8 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x8] +; CHECK-NEXT: rdvl x8, #-9 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: stnt1d { z0.d }, p0, [x8] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 8 %base_load_bc = bitcast * %base_load to i64* %data = call @llvm.aarch64.sve.ldnt1.nxv2i64( %mask, @@ -35,9 +37,10 @@ define void @imm_out_of_range( * %base, %mas define void @test_masked_ldst_sv2i64( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i64: -; CHECK-NEXT: ldnt1d { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-8, mul vl] -; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x0, #-7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: stnt1d { z0.d }, p0, [x0, #-7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %base_load_bc = bitcast * %base_load to i64* %data = call @llvm.aarch64.sve.ldnt1.nxv2i64( %mask, @@ -52,9 +55,10 @@ define void @test_masked_ldst_sv2i64( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2f64: -; CHECK-NEXT: ldnt1d { z[[DATA:[0-9]+]].d }, p0/z, [x0, #-6, mul vl] -; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x0, #-5, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0, #-6, mul vl] +; CHECK-NEXT: stnt1d { z0.d }, p0, [x0, #-5, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -6 %base_load_bc = bitcast * %base_load to double* %data = call @llvm.aarch64.sve.ldnt1.nxv2f64( %mask, @@ -71,9 +75,10 @@ define void @test_masked_ldst_sv2f64( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4i32: -; CHECK-NEXT: ldnt1w { z[[DATA:[0-9]+]].s }, p0/z, [x0, #6, mul vl] -; CHECK-NEXT: stnt1w { z[[DATA]].s }, p0, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: stnt1w { z0.s }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %base_load_bc = bitcast * %base_load to i32* %data = call @llvm.aarch64.sve.ldnt1.nxv4i32( %mask, @@ -88,9 +93,10 @@ define void @test_masked_ldst_sv4i32( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4f32: -; CHECK-NEXT: ldnt1w { z[[DATA:[0-9]+]].s }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: stnt1w { z[[DATA]].s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: stnt1w { z0.s }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %base_load_bc = bitcast * %base_load to float* %data = call @llvm.aarch64.sve.ldnt1.nxv4f32( %mask, @@ -108,9 +114,10 @@ define void @test_masked_ldst_sv4f32( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8i16: -; CHECK-NEXT: ldnt1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, #6, mul vl] -; CHECK-NEXT: stnt1h { z[[DATA]].h }, p0, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: stnt1h { z0.h }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %base_load_bc = bitcast * %base_load to i16* %data = call @llvm.aarch64.sve.ldnt1.nxv8i16( %mask, @@ -125,9 +132,10 @@ define void @test_masked_ldst_sv8i16( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8f16: -; CHECK-NEXT: ldnt1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: stnt1h { z[[DATA]].h }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: stnt1h { z0.h }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %base_load_bc = bitcast * %base_load to half* %data = call @llvm.aarch64.sve.ldnt1.nxv8f16( %mask, @@ -142,9 +150,10 @@ define void @test_masked_ldst_sv8f16( * %base, * %base, %mask) nounwind #0 { ; CHECK-LABEL: test_masked_ldst_sv8bf16: -; CHECK-NEXT: ldnt1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, #-1, mul vl] -; CHECK-NEXT: stnt1h { z[[DATA]].h }, p0, [x0, #2, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, #-1, mul vl] +; CHECK-NEXT: stnt1h { z0.h }, p0, [x0, #2, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %base_load_bc = bitcast * %base_load to bfloat* %data = call @llvm.aarch64.sve.ldnt1.nxv8bf16( %mask, @@ -161,9 +170,10 @@ define void @test_masked_ldst_sv8bf16( * %base, * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv16i8: -; CHECK-NEXT: ldnt1b { z[[DATA:[0-9]+]].b }, p0/z, [x0, #6, mul vl] -; CHECK-NEXT: stnt1b { z[[DATA]].b }, p0, [x0, #7, mul vl] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldnt1b { z0.b }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: stnt1b { z0.b }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %base_load_bc = bitcast * %base_load to i8* %data = call @llvm.aarch64.sve.ldnt1.nxv16i8( %mask, From 348471575d9c24bbfb124ca5eac1589de075da88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Tue, 12 Jan 2021 13:15:21 -0500 Subject: [PATCH 42/86] Add -ansi option to CompileOnly group -ansi is documented as being the "same as -std=c89", but there are differences when passing it to a link. Adding -ansi to said group makes sense since it's supposed to be an alias for -std=c89 and resolves this inconsistency. --- clang/include/clang/Driver/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d9586e086a9cf1..b441c1b4c1692d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -800,7 +800,7 @@ def Z_Flag : Flag<["-"], "Z">, Group; def Z_Joined : Joined<["-"], "Z">; def all__load : Flag<["-"], "all_load">; def allowable__client : Separate<["-"], "allowable_client">; -def ansi : Flag<["-", "--"], "ansi">; +def ansi : Flag<["-", "--"], "ansi">, Group; def arch__errors__fatal : Flag<["-"], "arch_errors_fatal">; def arch : Separate<["-"], "arch">, Flags<[NoXarchOption]>; def arch__only : Separate<["-"], "arch_only">; From b117d17d264f448e0b037a62f5a48ec9aedd886c Mon Sep 17 00:00:00 2001 From: Emil Engler Date: Wed, 6 Jan 2021 18:28:54 -0800 Subject: [PATCH 43/86] [doc] Place sha256 in lld/README.md into backticks Reviewed By: smeenai Differential Revision: https://reviews.llvm.org/D93984 --- lld/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/README.md b/lld/README.md index 3b8cd7a149480b..3b693c9957ee85 100644 --- a/lld/README.md +++ b/lld/README.md @@ -16,4 +16,4 @@ same tests, we create a collection of self contained programs. It is hosted at https://s3-us-west-2.amazonaws.com/linker-tests/lld-speed-test.tar.xz -The current sha256 is 10eec685463d5a8bbf08d77f4ca96282161d396c65bd97dc99dbde644a31610f. +The current sha256 is `10eec685463d5a8bbf08d77f4ca96282161d396c65bd97dc99dbde644a31610f`. From ef3800e82169c674219501d9ac09ef12b28e6359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Tue, 12 Jan 2021 13:18:13 -0500 Subject: [PATCH 44/86] Return false from __has_declspec_attribute() if not explicitly enabled Currently, projects can check for __has_declspec_attribute() and use it accordingly, but the check for __has_declspec_attribute will return true even if declspec attributes are not enabled for the target. This changes Clang to instead return false when declspec attributes are not supported for the target. --- clang/lib/Lex/PPMacroExpansion.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 3969630f200202..43d31d6c5732e5 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1693,8 +1693,14 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_feature_check_malformed); - return II ? hasAttribute(AttrSyntax::Declspec, nullptr, II, - getTargetInfo(), getLangOpts()) : 0; + if (II) { + const LangOptions &LangOpts = getLangOpts(); + return LangOpts.DeclSpecKeyword && + hasAttribute(AttrSyntax::Declspec, nullptr, II, + getTargetInfo(), LangOpts); + } + + return false; }); } else if (II == Ident__has_cpp_attribute || II == Ident__has_c_attribute) { From 5aefc8dc4d14ad04259ab8ae0b2e0da2596d66f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 12 Jan 2021 18:16:57 +0100 Subject: [PATCH 45/86] [llvm] [cmake] Remove obsolete /usr/local hack for *BSD Remove the hack adding /usr/local paths on FreeBSD and DragonFlyBSD. It does not seem to be necessary today, and it breaks cross builds. Differential Revision: https://reviews.llvm.org/D94491 --- llvm/CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index ee1b646ab651f8..26a7029afefd5b 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -911,13 +911,6 @@ if(LLVM_TARGET_IS_CROSSCOMPILE_HOST) # (this is a variable that CrossCompile sets on recursive invocations) endif() -if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") - # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM - # with libxml2, iconv.h, etc., we must add /usr/local paths. - include_directories(SYSTEM "/usr/local/include") - link_directories("/usr/local/lib") -endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") - if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) # special hack for Solaris to handle crazy system sys/regset.h include_directories("${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/Solaris") From bb9ebf6baf7057d7f2aed90fccbac2414cf9a134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Tue, 12 Jan 2021 18:56:49 +0100 Subject: [PATCH 46/86] [Tests] Add tests for new InstCombine OR transformation, NFC --- llvm/test/Transforms/InstCombine/or.ll | 114 +++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index b5e3af2c765254..d41b8d53dd40ce 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n32:64" +declare void @use(i32) define i32 @test12(i32 %A) { ; Should be eliminated @@ -1000,3 +1001,116 @@ end: %conv8 = zext i1 %t5 to i32 ret i32 %conv8 } + +define i32 @test1(i32 %x, i32 %y) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: ret i32 [[OR1]] +; + %xor = xor i32 %y, %x + %or = or i32 %y, %x + %neg = xor i32 %or, -1 + %or1 = or i32 %xor, %neg + ret i32 %or1 +} + +define i32 @test2(i32 %x, i32 %y) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: ret i32 [[OR1]] +; + %or = or i32 %y, %x + %neg = xor i32 %or, -1 + %xor = xor i32 %y, %x + %or1 = or i32 %xor, %neg + ret i32 %or1 +} + +define i32 @test3(i32 %x, i32 %y) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: ret i32 [[OR1]] +; + %or = or i32 %y, %x + %neg = xor i32 %or, -1 + %xor = xor i32 %y, %x + %or1 = or i32 %xor, %neg + ret i32 %or1 +} + +define <2 x i32> @test4_vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @test4_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[OR]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y]], [[X]] +; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[XOR]], [[NEG]] +; CHECK-NEXT: ret <2 x i32> [[OR1]] +; + %or = or <2 x i32> %y, %x + %neg = xor <2 x i32> %or, + %xor = xor <2 x i32> %y, %x + %or1 = or <2 x i32> %xor, %neg + ret <2 x i32> %or1 +} + +define i32 @test5_use(i32 %x, i32 %y) { +; CHECK-LABEL: @test5_use( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i32 [[NEG]]) +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: ret i32 [[OR1]] +; + %or = or i32 %y, %x + %neg = xor i32 %or, -1 + %xor = xor i32 %y, %x + call void @use(i32 %neg) + %or1 = or i32 %xor, %neg + ret i32 %or1 +} + +define i32 @test5_use2(i32 %x, i32 %y) { +; CHECK-LABEL: @test5_use2( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i32 [[XOR]]) +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: ret i32 [[OR1]] +; + %or = or i32 %y, %x + %neg = xor i32 %or, -1 + %xor = xor i32 %y, %x + call void @use(i32 %xor) + %or1 = or i32 %xor, %neg + ret i32 %or1 +} +define i32 @test5_use3(i32 %x, i32 %y) { +; CHECK-LABEL: @test5_use3( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 +; CHECK-NEXT: call void @use(i32 [[NEG]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i32 [[XOR]]) +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: ret i32 [[OR1]] +; + %or = or i32 %y, %x + %neg = xor i32 %or, -1 + call void @use(i32 %neg) + %xor = xor i32 %y, %x + call void @use(i32 %xor) + %or1 = or i32 %xor, %neg + ret i32 %or1 +} + From 0529946b5bafafd10d77b946ee9fa96f388860ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Tue, 12 Jan 2021 19:28:01 +0100 Subject: [PATCH 47/86] [instCombine] Add (A ^ B) | ~(A | B) -> ~(A & B) define i32 @src(i32 %x, i32 %y) { %0: %xor = xor i32 %y, %x %or = or i32 %y, %x %neg = xor i32 %or, 4294967295 %or1 = or i32 %xor, %neg ret i32 %or1 } => define i32 @tgt(i32 %x, i32 %y) { %0: %and = and i32 %x, %y %neg = xor i32 %and, 4294967295 ret i32 %neg } Transformation seems to be correct! https://alive2.llvm.org/ce/z/Cvca4a --- .../InstCombine/InstCombineAndOrXor.cpp | 8 ++++ llvm/test/Transforms/InstCombine/or.ll | 39 +++++++------------ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 15dcf2d19c1521..352126fa07ca24 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1627,6 +1627,14 @@ static Instruction *foldOrToXor(BinaryOperator &I, match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + // Operand complexity canonicalization guarantees that the 'xor' is Op0. + // (A ^ B) | ~(A | B) --> ~(A & B) + // (A ^ B) | ~(B | A) --> ~(A & B) + if (Op0->hasOneUse() || Op1->hasOneUse()) + if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); + // (A & ~B) | (~A & B) --> A ^ B // (A & ~B) | (B & ~A) --> A ^ B // (~B & A) | (~A & B) --> A ^ B diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index d41b8d53dd40ce..b5da1734c10200 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -1004,10 +1004,8 @@ end: define i32 @test1(i32 %x, i32 %y) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] -; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR1]] ; %xor = xor i32 %y, %x @@ -1019,13 +1017,11 @@ define i32 @test1(i32 %x, i32 %y) { define i32 @test2(i32 %x, i32 %y) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR1]] ; - %or = or i32 %y, %x + %or = or i32 %x, %y %neg = xor i32 %or, -1 %xor = xor i32 %y, %x %or1 = or i32 %xor, %neg @@ -1034,25 +1030,21 @@ define i32 @test2(i32 %x, i32 %y) { define i32 @test3(i32 %x, i32 %y) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR1]] ; %or = or i32 %y, %x %neg = xor i32 %or, -1 - %xor = xor i32 %y, %x + %xor = xor i32 %x, %y %or1 = or i32 %xor, %neg ret i32 %or1 } define <2 x i32> @test4_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test4_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[OR]], -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y]], [[X]] -; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[XOR]], [[NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i32> [[OR1]] ; %or = or <2 x i32> %y, %x @@ -1066,9 +1058,9 @@ define i32 @test5_use(i32 %x, i32 %y) { ; CHECK-LABEL: @test5_use( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] ; CHECK-NEXT: call void @use(i32 [[NEG]]) -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y]], [[X]] +; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR1]] ; %or = or i32 %y, %x @@ -1081,11 +1073,10 @@ define i32 @test5_use(i32 %x, i32 %y) { define i32 @test5_use2(i32 %x, i32 %y) { ; CHECK-LABEL: @test5_use2( -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: call void @use(i32 [[XOR]]) -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y]], [[X]] +; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR1]] ; %or = or i32 %y, %x From 6f4d460762006af17826693abc1e7139a76aa1f2 Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Thu, 17 Dec 2020 14:28:03 +0530 Subject: [PATCH 48/86] [Flang][openmp][openacc] Extend CheckNoBranching to handle branching provided by LabelEnforce. `CheckNoBranching` is currently handling only illegal branching out for constructs with `Parser::Name` in them. Extend the same for handling illegal branching out caused by `Parser::Label` based statements. This patch could possibly solve one of the issues(typically branching out) mentioned in D92735. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D93447 --- flang/lib/Semantics/check-directive-structure.h | 15 ++++++++++++++- flang/lib/Semantics/check-omp-structure.cpp | 9 +-------- flang/test/Semantics/omp-parallell01.f90 | 3 +-- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/flang/lib/Semantics/check-directive-structure.h b/flang/lib/Semantics/check-directive-structure.h index 062f85b63b85a5..1075087feb4fbf 100644 --- a/flang/lib/Semantics/check-directive-structure.h +++ b/flang/lib/Semantics/check-directive-structure.h @@ -15,7 +15,6 @@ #include "flang/Common/enum-set.h" #include "flang/Semantics/semantics.h" #include "flang/Semantics/tools.h" - #include namespace Fortran::semantics { @@ -43,6 +42,9 @@ template class NoBranchingEnforce { template bool Pre(const parser::Statement &statement) { currentStatementSourcePosition_ = statement.source; + if (statement.label.has_value()) { + labels_.insert(*statement.label); + } return true; } @@ -54,6 +56,8 @@ template class NoBranchingEnforce { } void Post(const parser::StopStmt &) { EmitBranchOutError("STOP"); } + std::set labels() { return labels_; } + private: parser::MessageFormattedText GetEnclosingMsg() const { return {"Enclosing %s construct"_en_US, upperCaseDirName_}; @@ -103,6 +107,7 @@ template class NoBranchingEnforce { parser::CharBlock sourcePosition_; std::string upperCaseDirName_; D currentDirective_; + std::set labels_; }; // Generic structure checker for directives/clauses language such as OpenMP @@ -226,6 +231,9 @@ class DirectiveStructureChecker : public virtual BaseChecker { SayNotMatching(beginDir.source, endDir.source); } } + // Check illegal branching out of `Parser::Block` for `Parser::Name` based + // nodes (examples `Parser::ExitStmt`) along with `Parser::Label` + // based nodes (example `Parser::GotoStmt`). void CheckNoBranching(const parser::Block &block, D directive, const parser::CharBlock &directiveSource); @@ -271,6 +279,11 @@ void DirectiveStructureChecker::CheckNoBranching( NoBranchingEnforce noBranchingEnforce{ context_, directiveSource, directive, ContextDirectiveAsFortran()}; parser::Walk(block, noBranchingEnforce); + + LabelEnforce directiveLabelEnforce{context_, noBranchingEnforce.labels(), + directiveSource, + parser::ToUpperCaseLetters(getDirectiveName(directive).str()).c_str()}; + parser::Walk(block, directiveLabelEnforce); } // Check that only clauses included in the given set are present after the given diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 4d1c96f66905c0..773f5b2aeb219b 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -125,14 +125,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { CheckMatching(beginDir, endDir); PushContextAndClauseSets(beginDir.source, beginDir.v); - - switch (beginDir.v) { - case llvm::omp::OMPD_parallel: - CheckNoBranching(block, llvm::omp::OMPD_parallel, beginDir.source); - break; - default: - break; - } + CheckNoBranching(block, beginDir.v, beginDir.source); } void OmpStructureChecker::Leave(const parser::OpenMPBlockConstruct &) { diff --git a/flang/test/Semantics/omp-parallell01.f90 b/flang/test/Semantics/omp-parallell01.f90 index e3490563f332a5..1a2cae1830bc77 100644 --- a/flang/test/Semantics/omp-parallell01.f90 +++ b/flang/test/Semantics/omp-parallell01.f90 @@ -1,5 +1,4 @@ ! RUN: %S/test_errors.sh %s %t %f18 -fopenmp -! XFAIL: * ! OpenMP Version 4.5 ! 2.5 parallel construct. @@ -13,7 +12,7 @@ program omp_parallel do i = 1, 10 do j = 1, 10 print *, "Hello" - !ERROR: invalid branch to/from OpenMP structured block + !ERROR: Control flow escapes from PARALLEL goto 10 end do end do From 03c8d6a0c4bd0016bdfd1e53e6878696fe6412ed Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 12 Jan 2021 09:52:00 -0800 Subject: [PATCH 49/86] [LegalizeDAG][RISCV][PowerPC][AMDGPU][WebAssembly] Improve expansion of SETONE/SETUEQ on targets without SETO/SETUO. If SETO/SETUO aren't legal, they'll be expanded and we'll end up with 3 comparisons. SETONE is equivalent to (SETOGT || SETOLT) so if one of those operations is supported use that expansion. We don't need both since we can commute the operands to make the other. SETUEQ can be implemented with !(SETOGT || SETOLT) or (SETULE && SETUGE). I've only implemented the first because it didn't look like most of the affected targets had legal SETULE/SETUGE. Reviewed By: frasercrmck, tlively, nemanjai Differential Revision: https://reviews.llvm.org/D94450 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 19 ++- llvm/test/CodeGen/AMDGPU/setcc.ll | 16 +- llvm/test/CodeGen/PowerPC/spe.ll | 49 +++--- llvm/test/CodeGen/PowerPC/vsx.ll | 40 ++--- llvm/test/CodeGen/RISCV/double-br-fcmp.ll | 53 +++--- llvm/test/CodeGen/RISCV/double-fcmp.ll | 48 +++--- llvm/test/CodeGen/RISCV/double-select-fcmp.ll | 51 +++--- llvm/test/CodeGen/RISCV/float-br-fcmp.ll | 51 +++--- llvm/test/CodeGen/RISCV/float-fcmp.ll | 46 ++---- llvm/test/CodeGen/RISCV/float-select-fcmp.ll | 43 ++--- llvm/test/CodeGen/RISCV/half-br-fcmp.ll | 43 ++--- llvm/test/CodeGen/RISCV/half-fcmp.ll | 38 ++--- llvm/test/CodeGen/RISCV/half-select-fcmp.ll | 39 ++--- llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll | 156 ++++++------------ llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll | 156 ++++++------------ .../CodeGen/WebAssembly/comparisons-f32.ll | 28 ++-- .../CodeGen/WebAssembly/comparisons-f64.ll | 28 ++-- .../CodeGen/WebAssembly/simd-comparisons.ll | 68 ++++---- 18 files changed, 371 insertions(+), 601 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9e1ea7c81a3581..523895200f6a40 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1741,13 +1741,28 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode( assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETONE: + case ISD::SETUEQ: + // If the SETUO or SETO CC isn't legal, we might be able to use + // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one + // of SETOGT/SETOLT to be legal, the other can be emulated by swapping + // the operands. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + if (!TLI.isCondCodeLegal(CC2, OpVT) && + (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) || + TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) { + CC1 = ISD::SETOGT; + CC2 = ISD::SETOLT; + Opc = ISD::OR; + NeedInvert = ((unsigned)CCCode & 0x8U); + break; + } + LLVM_FALLTHROUGH; case ISD::SETOEQ: case ISD::SETOGT: case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: - case ISD::SETONE: - case ISD::SETUEQ: case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll index a259784bc278f8..e888ceb94cfa9b 100644 --- a/llvm/test/CodeGen/AMDGPU/setcc.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc.ll @@ -96,11 +96,9 @@ entry: } ; FUNC-LABEL: {{^}}f32_one: -; R600-DAG: SETE_DX10 -; R600-DAG: SETE_DX10 -; R600-DAG: AND_INT -; R600-DAG: SETNE_DX10 -; R600-DAG: AND_INT +; R600-DAG: SETGT_DX10 +; R600-DAG: SETGT_DX10 +; R600-DAG: OR_INT ; R600-DAG: SETNE_INT ; GCN: v_cmp_lg_f32_e32 vcc @@ -128,12 +126,10 @@ entry: } ; FUNC-LABEL: {{^}}f32_ueq: -; R600-DAG: SETNE_DX10 -; R600-DAG: SETNE_DX10 -; R600-DAG: OR_INT -; R600-DAG: SETE_DX10 +; R600-DAG: SETGT_DX10 +; R600-DAG: SETGT_DX10 ; R600-DAG: OR_INT -; R600-DAG: SETNE_INT +; R600-DAG: SETE_INT ; GCN: v_cmp_nlg_f32_e32 vcc ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index 59bc6abc4f759c..6ab05554aa8183 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -297,12 +297,10 @@ define i1 @test_fcmpord(float %a, float %b) #0 { define i1 @test_fcmpueq(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpueq: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: efscmpeq 1, 4, 4 -; CHECK-NEXT: crnand 20, 5, 1 -; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: efscmpgt 0, 3, 4 +; CHECK-NEXT: efscmplt 1, 3, 4 ; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: crnor 20, 1, 20 +; CHECK-NEXT: cror 20, 5, 1 ; CHECK-NEXT: bc 12, 20, .LBB14_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 @@ -318,12 +316,10 @@ define i1 @test_fcmpueq(float %a, float %b) #0 { define i1 @test_fcmpne(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpne: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: efscmpeq 1, 3, 3 -; CHECK-NEXT: crand 20, 5, 1 -; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: efscmplt 0, 3, 4 +; CHECK-NEXT: efscmpgt 1, 3, 4 ; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: crorc 20, 1, 20 +; CHECK-NEXT: crnor 20, 5, 1 ; CHECK-NEXT: bc 12, 20, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 @@ -1117,22 +1113,19 @@ define i32 @test_dcmpueq(double %a, double %b) #0 { ; SPE-LABEL: test_dcmpueq: ; SPE: # %bb.0: # %entry ; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 ; SPE-NEXT: evmergelo 3, 3, 4 -; SPE-NEXT: evmergelo 4, 5, 6 -; SPE-NEXT: efdcmpeq 0, 4, 4 -; SPE-NEXT: bc 4, 1, .LBB16_4 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: bc 12, 1, .LBB16_3 ; SPE-NEXT: # %bb.1: # %entry -; SPE-NEXT: efdcmpeq 0, 3, 3 -; SPE-NEXT: bc 4, 1, .LBB16_4 -; SPE-NEXT: # %bb.2: # %entry -; SPE-NEXT: efdcmpeq 0, 3, 4 -; SPE-NEXT: bc 12, 1, .LBB16_4 -; SPE-NEXT: # %bb.3: # %fa -; SPE-NEXT: li 3, 0 -; SPE-NEXT: b .LBB16_5 -; SPE-NEXT: .LBB16_4: # %tr +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: bc 12, 1, .LBB16_3 +; SPE-NEXT: # %bb.2: # %tr ; SPE-NEXT: li 3, 1 -; SPE-NEXT: .LBB16_5: # %ret +; SPE-NEXT: b .LBB16_4 +; SPE-NEXT: .LBB16_3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB16_4: # %ret ; SPE-NEXT: stw 3, 12(1) ; SPE-NEXT: lwz 3, 12(1) ; SPE-NEXT: addi 1, 1, 16 @@ -1208,14 +1201,12 @@ ret: define i1 @test_dcmpne(double %a, double %b) #0 { ; SPE-LABEL: test_dcmpne: ; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 ; SPE-NEXT: evmergelo 3, 3, 4 -; SPE-NEXT: evmergelo 4, 5, 6 ; SPE-NEXT: li 7, 1 -; SPE-NEXT: efdcmpeq 0, 4, 4 -; SPE-NEXT: efdcmpeq 1, 3, 3 -; SPE-NEXT: efdcmpeq 5, 3, 4 -; SPE-NEXT: crand 24, 5, 1 -; SPE-NEXT: crorc 20, 21, 24 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: efdcmpgt 1, 3, 5 +; SPE-NEXT: crnor 20, 5, 1 ; SPE-NEXT: bc 12, 20, .LBB17_2 ; SPE-NEXT: # %bb.1: # %entry ; SPE-NEXT: ori 3, 7, 0 diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index 6349523bc395f7..73cca71b539c35 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -624,49 +624,33 @@ entry: define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvcmpeqsp vs0, v5, v5 -; CHECK-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-NEXT: xvcmpeqsp vs2, v4, v5 -; CHECK-NEXT: xxlnor vs0, vs0, vs0 -; CHECK-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-NEXT: xxlor vs0, vs1, vs0 -; CHECK-NEXT: xxlor vs0, vs2, vs0 +; CHECK-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-NEXT: xxlnor vs0, vs1, vs0 ; CHECK-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test22: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xvcmpeqsp vs0, v5, v5 -; CHECK-REG-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-REG-NEXT: xvcmpeqsp vs2, v4, v5 -; CHECK-REG-NEXT: xxlnor vs0, vs0, vs0 -; CHECK-REG-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-REG-NEXT: xxlor vs0, vs1, vs0 -; CHECK-REG-NEXT: xxlor vs0, vs2, vs0 +; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-REG-NEXT: xxlnor vs0, vs1, vs0 ; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test22: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xvcmpeqsp vs0, v4, v5 -; CHECK-FISL-NEXT: xvcmpeqsp vs1, v5, v5 -; CHECK-FISL-NEXT: xxlnor vs2, vs1, vs1 -; CHECK-FISL-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-FISL-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-FISL-NEXT: xxlor vs1, vs1, vs2 -; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1 +; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4 +; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5 +; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs1 ; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test22: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xvcmpeqsp vs0, v5, v5 -; CHECK-LE-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-LE-NEXT: xvcmpeqsp vs2, v4, v5 -; CHECK-LE-NEXT: xxlnor vs0, vs0, vs0 -; CHECK-LE-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-LE-NEXT: xxlor vs0, vs1, vs0 -; CHECK-LE-NEXT: xxlor vs0, vs2, vs0 +; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-LE-NEXT: xxlnor vs0, vs1, vs0 ; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-LE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll index e7cfa8d9af0774..94744a22364dc9 100644 --- a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll @@ -305,24 +305,20 @@ if.then: unreachable } -; TODO: feq.s+sltiu+bne -> feq.s+beq define void @br_fcmp_one(double %a, double %b) nounwind { ; RV32IFD-LABEL: br_fcmp_one: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw a0, 0(sp) -; RV32IFD-NEXT: sw a1, 4(sp) -; RV32IFD-NEXT: fld ft0, 0(sp) ; RV32IFD-NEXT: sw a2, 0(sp) ; RV32IFD-NEXT: sw a3, 4(sp) +; RV32IFD-NEXT: fld ft0, 0(sp) +; RV32IFD-NEXT: sw a0, 0(sp) +; RV32IFD-NEXT: sw a1, 4(sp) ; RV32IFD-NEXT: fld ft1, 0(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft1 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: feq.d a1, ft0, ft1 -; RV32IFD-NEXT: not a1, a1 -; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 ; RV32IFD-NEXT: bnez a0, .LBB7_2 ; RV32IFD-NEXT: # %bb.1: # %if.else ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -335,14 +331,11 @@ define void @br_fcmp_one(double %a, double %b) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addi sp, sp, -16 ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: feq.d a0, ft1, ft1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: and a0, a1, a0 -; RV64IFD-NEXT: feq.d a1, ft0, ft1 -; RV64IFD-NEXT: not a1, a1 -; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: bnez a0, .LBB7_2 ; RV64IFD-NEXT: # %bb.1: # %if.else ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -417,13 +410,11 @@ define void @br_fcmp_ueq(double %a, double %b) nounwind { ; RV32IFD-NEXT: sw a0, 0(sp) ; RV32IFD-NEXT: sw a1, 4(sp) ; RV32IFD-NEXT: fld ft1, 0(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft0 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a0, a0, a1 -; RV32IFD-NEXT: bnez a0, .LBB9_2 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: addi a1, zero, 1 +; RV32IFD-NEXT: bne a0, a1, .LBB9_2 ; RV32IFD-NEXT: # %bb.1: # %if.else ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 @@ -437,13 +428,11 @@ define void @br_fcmp_ueq(double %a, double %b) nounwind { ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IFD-NEXT: fmv.d.x ft0, a1 ; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: feq.d a0, ft1, ft0 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: feq.d a2, ft1, ft1 -; RV64IFD-NEXT: and a1, a2, a1 -; RV64IFD-NEXT: xori a1, a1, 1 -; RV64IFD-NEXT: or a0, a0, a1 -; RV64IFD-NEXT: bnez a0, .LBB9_2 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: addi a1, zero, 1 +; RV64IFD-NEXT: bne a0, a1, .LBB9_2 ; RV64IFD-NEXT: # %bb.1: # %if.else ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll index 0613dc6b021203..1caec05cf9a9d3 100644 --- a/llvm/test/CodeGen/RISCV/double-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll @@ -148,31 +148,25 @@ define i32 @fcmp_one(double %a, double %b) nounwind { ; RV32IFD-LABEL: fcmp_one: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: sw a2, 8(sp) ; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft1 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: feq.d a1, ft0, ft1 -; RV32IFD-NEXT: not a1, a1 -; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcmp_one: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: feq.d a0, ft1, ft1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: and a0, a1, a0 -; RV64IFD-NEXT: feq.d a1, ft0, ft1 -; RV64IFD-NEXT: not a1, a1 -; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: ret %1 = fcmp one double %a, %b %2 = zext i1 %1 to i32 @@ -218,12 +212,10 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind { ; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft0 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -231,12 +223,10 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fmv.d.x ft0, a1 ; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: feq.d a0, ft1, ft0 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: feq.d a2, ft1, ft1 -; RV64IFD-NEXT: and a1, a2, a1 -; RV64IFD-NEXT: xori a1, a1, 1 -; RV64IFD-NEXT: or a0, a0, a1 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: xori a0, a0, 1 ; RV64IFD-NEXT: ret %1 = fcmp ueq double %a, %b %2 = zext i1 %1 to i32 diff --git a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll index 3177175ef3058a..0481ca9ba90e90 100644 --- a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll @@ -206,27 +206,23 @@ define double @select_fcmp_ole(double %a, double %b) nounwind { } define double @select_fcmp_one(double %a, double %b) nounwind { -; TODO: feq.s+sltiu+bne sequence could be optimised ; RV32IFD-LABEL: select_fcmp_one: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: sw a2, 8(sp) ; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft1 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: feq.d a1, ft0, ft1 -; RV32IFD-NEXT: not a1, a1 -; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 ; RV32IFD-NEXT: bnez a0, .LBB6_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fmv.d ft0, ft1 +; RV32IFD-NEXT: fmv.d ft1, ft0 ; RV32IFD-NEXT: .LBB6_2: -; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: fsd ft1, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: addi sp, sp, 16 @@ -234,14 +230,11 @@ define double @select_fcmp_one(double %a, double %b) nounwind { ; ; RV64IFD-LABEL: select_fcmp_one: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 ; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: feq.d a0, ft1, ft1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: and a0, a1, a0 -; RV64IFD-NEXT: feq.d a1, ft0, ft1 -; RV64IFD-NEXT: not a1, a1 -; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: flt.d a0, ft0, ft1 +; RV64IFD-NEXT: flt.d a1, ft1, ft0 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: bnez a0, .LBB6_2 ; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fmv.d ft0, ft1 @@ -304,12 +297,10 @@ define double @select_fcmp_ueq(double %a, double %b) nounwind { ; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft0 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: bnez a0, .LBB8_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fmv.d ft1, ft0 @@ -324,12 +315,10 @@ define double @select_fcmp_ueq(double %a, double %b) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fmv.d.x ft1, a1 ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft1 -; RV64IFD-NEXT: feq.d a1, ft1, ft1 -; RV64IFD-NEXT: feq.d a2, ft0, ft0 -; RV64IFD-NEXT: and a1, a2, a1 -; RV64IFD-NEXT: xori a1, a1, 1 -; RV64IFD-NEXT: or a0, a0, a1 +; RV64IFD-NEXT: flt.d a0, ft0, ft1 +; RV64IFD-NEXT: flt.d a1, ft1, ft0 +; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: xori a0, a0, 1 ; RV64IFD-NEXT: bnez a0, .LBB8_2 ; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fmv.d ft0, ft1 diff --git a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll index b4a2783c6561e7..3ef15ccb933770 100644 --- a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll @@ -282,20 +282,16 @@ if.then: unreachable } -; TODO: feq.s+sltiu+bne -> feq.s+beq define void @br_fcmp_one(float %a, float %b) nounwind { ; RV32IF-LABEL: br_fcmp_one: ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: feq.s a0, ft1, ft1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: feq.s a1, ft0, ft1 -; RV32IF-NEXT: not a1, a1 -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: bnez a0, .LBB7_2 ; RV32IF-NEXT: # %bb.1: # %if.else ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -308,14 +304,11 @@ define void @br_fcmp_one(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: feq.s a0, ft1, ft1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: and a0, a1, a0 -; RV64IF-NEXT: feq.s a1, ft0, ft1 -; RV64IF-NEXT: not a1, a1 -; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 ; RV64IF-NEXT: bnez a0, .LBB7_2 ; RV64IF-NEXT: # %bb.1: # %if.else ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -382,13 +375,11 @@ define void @br_fcmp_ueq(float %a, float %b) nounwind { ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.w.x ft0, a1 ; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: feq.s a0, ft1, ft0 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: feq.s a2, ft1, ft1 -; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a0, a0, a1 -; RV32IF-NEXT: bnez a0, .LBB9_2 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: addi a1, zero, 1 +; RV32IF-NEXT: bne a0, a1, .LBB9_2 ; RV32IF-NEXT: # %bb.1: # %if.else ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -402,13 +393,11 @@ define void @br_fcmp_ueq(float %a, float %b) nounwind { ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IF-NEXT: fmv.w.x ft0, a1 ; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: feq.s a0, ft1, ft0 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: feq.s a2, ft1, ft1 -; RV64IF-NEXT: and a1, a2, a1 -; RV64IF-NEXT: xori a1, a1, 1 -; RV64IF-NEXT: or a0, a0, a1 -; RV64IF-NEXT: bnez a0, .LBB9_2 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 +; RV64IF-NEXT: addi a1, zero, 1 +; RV64IF-NEXT: bne a0, a1, .LBB9_2 ; RV64IF-NEXT: # %bb.1: # %if.else ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/float-fcmp.ll b/llvm/test/CodeGen/RISCV/float-fcmp.ll index 06dbb362537c33..c19013fce4b082 100644 --- a/llvm/test/CodeGen/RISCV/float-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-fcmp.ll @@ -117,26 +117,20 @@ define i32 @fcmp_ole(float %a, float %b) nounwind { define i32 @fcmp_one(float %a, float %b) nounwind { ; RV32IF-LABEL: fcmp_one: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: feq.s a0, ft1, ft1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: feq.s a1, ft0, ft1 -; RV32IF-NEXT: not a1, a1 -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcmp_one: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: feq.s a0, ft1, ft1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: and a0, a1, a0 -; RV64IF-NEXT: feq.s a1, ft0, ft1 -; RV64IF-NEXT: not a1, a1 -; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 ; RV64IF-NEXT: ret %1 = fcmp one float %a, %b %2 = zext i1 %1 to i32 @@ -171,24 +165,20 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.w.x ft0, a1 ; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: feq.s a0, ft1, ft0 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: feq.s a2, ft1, ft1 -; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcmp_ueq: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a1 ; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: feq.s a0, ft1, ft0 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: feq.s a2, ft1, ft1 -; RV64IF-NEXT: and a1, a2, a1 -; RV64IF-NEXT: xori a1, a1, 1 -; RV64IF-NEXT: or a0, a0, a1 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 +; RV64IF-NEXT: xori a0, a0, 1 ; RV64IF-NEXT: ret %1 = fcmp ueq float %a, %b %2 = zext i1 %1 to i32 diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll index 33d1d16d7bd439..73b7fd05dc1952 100644 --- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll @@ -165,17 +165,13 @@ define float @select_fcmp_ole(float %a, float %b) nounwind { } define float @select_fcmp_one(float %a, float %b) nounwind { -; TODO: feq.s+sltiu+bne sequence could be optimised ; RV32IF-LABEL: select_fcmp_one: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 ; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: feq.s a0, ft1, ft1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: feq.s a1, ft0, ft1 -; RV32IF-NEXT: not a1, a1 -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, ft1 +; RV32IF-NEXT: flt.s a1, ft1, ft0 +; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: bnez a0, .LBB6_2 ; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: fmv.s ft0, ft1 @@ -185,14 +181,11 @@ define float @select_fcmp_one(float %a, float %b) nounwind { ; ; RV64IF-LABEL: select_fcmp_one: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: feq.s a0, ft1, ft1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: and a0, a1, a0 -; RV64IF-NEXT: feq.s a1, ft0, ft1 -; RV64IF-NEXT: not a1, a1 -; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: flt.s a0, ft0, ft1 +; RV64IF-NEXT: flt.s a1, ft1, ft0 +; RV64IF-NEXT: or a0, a1, a0 ; RV64IF-NEXT: bnez a0, .LBB6_2 ; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fmv.s ft0, ft1 @@ -242,12 +235,10 @@ define float @select_fcmp_ueq(float %a, float %b) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.w.x ft1, a1 ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: feq.s a0, ft0, ft1 -; RV32IF-NEXT: feq.s a1, ft1, ft1 -; RV32IF-NEXT: feq.s a2, ft0, ft0 -; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: flt.s a0, ft0, ft1 +; RV32IF-NEXT: flt.s a1, ft1, ft0 +; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: bnez a0, .LBB8_2 ; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: fmv.s ft0, ft1 @@ -259,12 +250,10 @@ define float @select_fcmp_ueq(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft1, a1 ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: feq.s a0, ft0, ft1 -; RV64IF-NEXT: feq.s a1, ft1, ft1 -; RV64IF-NEXT: feq.s a2, ft0, ft0 -; RV64IF-NEXT: and a1, a2, a1 -; RV64IF-NEXT: xori a1, a1, 1 -; RV64IF-NEXT: or a0, a0, a1 +; RV64IF-NEXT: flt.s a0, ft0, ft1 +; RV64IF-NEXT: flt.s a1, ft1, ft0 +; RV64IF-NEXT: or a0, a1, a0 +; RV64IF-NEXT: xori a0, a0, 1 ; RV64IF-NEXT: bnez a0, .LBB8_2 ; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fmv.s ft0, ft1 diff --git a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll index f0c5ad4288d6ee..a79476f0b1ac93 100644 --- a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll @@ -258,18 +258,14 @@ if.then: unreachable } -; TODO: feq.h+sltiu+bne -> feq.h+beq define void @br_fcmp_one(half %a, half %b) nounwind { ; RV32IZFH-LABEL: br_fcmp_one: ; RV32IZFH: # %bb.0: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: feq.h a0, fa1, fa1 -; RV32IZFH-NEXT: feq.h a1, fa0, fa0 -; RV32IZFH-NEXT: and a0, a1, a0 -; RV32IZFH-NEXT: feq.h a1, fa0, fa1 -; RV32IZFH-NEXT: not a1, a1 -; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 ; RV32IZFH-NEXT: bnez a0, .LBB7_2 ; RV32IZFH-NEXT: # %bb.1: # %if.else ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -282,12 +278,9 @@ define void @br_fcmp_one(half %a, half %b) nounwind { ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: feq.h a0, fa1, fa1 -; RV64IZFH-NEXT: feq.h a1, fa0, fa0 -; RV64IZFH-NEXT: and a0, a1, a0 -; RV64IZFH-NEXT: feq.h a1, fa0, fa1 -; RV64IZFH-NEXT: not a1, a1 -; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 ; RV64IZFH-NEXT: bnez a0, .LBB7_2 ; RV64IZFH-NEXT: # %bb.1: # %if.else ; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -348,13 +341,11 @@ define void @br_fcmp_ueq(half %a, half %b) nounwind { ; RV32IZFH: # %bb.0: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: feq.h a0, fa0, fa1 -; RV32IZFH-NEXT: feq.h a1, fa1, fa1 -; RV32IZFH-NEXT: feq.h a2, fa0, fa0 -; RV32IZFH-NEXT: and a1, a2, a1 -; RV32IZFH-NEXT: xori a1, a1, 1 -; RV32IZFH-NEXT: or a0, a0, a1 -; RV32IZFH-NEXT: bnez a0, .LBB9_2 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 +; RV32IZFH-NEXT: addi a1, zero, 1 +; RV32IZFH-NEXT: bne a0, a1, .LBB9_2 ; RV32IZFH-NEXT: # %bb.1: # %if.else ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -366,13 +357,11 @@ define void @br_fcmp_ueq(half %a, half %b) nounwind { ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: feq.h a0, fa0, fa1 -; RV64IZFH-NEXT: feq.h a1, fa1, fa1 -; RV64IZFH-NEXT: feq.h a2, fa0, fa0 -; RV64IZFH-NEXT: and a1, a2, a1 -; RV64IZFH-NEXT: xori a1, a1, 1 -; RV64IZFH-NEXT: or a0, a0, a1 -; RV64IZFH-NEXT: bnez a0, .LBB9_2 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 +; RV64IZFH-NEXT: addi a1, zero, 1 +; RV64IZFH-NEXT: bne a0, a1, .LBB9_2 ; RV64IZFH-NEXT: # %bb.1: # %if.else ; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFH-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/half-fcmp.ll b/llvm/test/CodeGen/RISCV/half-fcmp.ll index 4d5704730eaae9..a8a98fae4d6088 100644 --- a/llvm/test/CodeGen/RISCV/half-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-fcmp.ll @@ -97,22 +97,16 @@ define i32 @fcmp_ole(half %a, half %b) nounwind { define i32 @fcmp_one(half %a, half %b) nounwind { ; RV32IZFH-LABEL: fcmp_one: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa1, fa1 -; RV32IZFH-NEXT: feq.h a1, fa0, fa0 -; RV32IZFH-NEXT: and a0, a1, a0 -; RV32IZFH-NEXT: feq.h a1, fa0, fa1 -; RV32IZFH-NEXT: not a1, a1 -; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcmp_one: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa1, fa1 -; RV64IZFH-NEXT: feq.h a1, fa0, fa0 -; RV64IZFH-NEXT: and a0, a1, a0 -; RV64IZFH-NEXT: feq.h a1, fa0, fa1 -; RV64IZFH-NEXT: not a1, a1 -; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 ; RV64IZFH-NEXT: ret %1 = fcmp one half %a, %b %2 = zext i1 %1 to i32 @@ -141,22 +135,18 @@ define i32 @fcmp_ord(half %a, half %b) nounwind { define i32 @fcmp_ueq(half %a, half %b) nounwind { ; RV32IZFH-LABEL: fcmp_ueq: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa0, fa1 -; RV32IZFH-NEXT: feq.h a1, fa1, fa1 -; RV32IZFH-NEXT: feq.h a2, fa0, fa0 -; RV32IZFH-NEXT: and a1, a2, a1 -; RV32IZFH-NEXT: xori a1, a1, 1 -; RV32IZFH-NEXT: or a0, a0, a1 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 +; RV32IZFH-NEXT: xori a0, a0, 1 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcmp_ueq: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa1 -; RV64IZFH-NEXT: feq.h a1, fa1, fa1 -; RV64IZFH-NEXT: feq.h a2, fa0, fa0 -; RV64IZFH-NEXT: and a1, a2, a1 -; RV64IZFH-NEXT: xori a1, a1, 1 -; RV64IZFH-NEXT: or a0, a0, a1 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 +; RV64IZFH-NEXT: xori a0, a0, 1 ; RV64IZFH-NEXT: ret %1 = fcmp ueq half %a, %b %2 = zext i1 %1 to i32 diff --git a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll index 1ef85f15eb2ed9..56a319fc156cb5 100644 --- a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll @@ -135,15 +135,11 @@ define half @select_fcmp_ole(half %a, half %b) nounwind { } define half @select_fcmp_one(half %a, half %b) nounwind { -; TODO: feq.h+sltiu+bne sequence could be optimised ; RV32IZFH-LABEL: select_fcmp_one: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa1, fa1 -; RV32IZFH-NEXT: feq.h a1, fa0, fa0 -; RV32IZFH-NEXT: and a0, a1, a0 -; RV32IZFH-NEXT: feq.h a1, fa0, fa1 -; RV32IZFH-NEXT: not a1, a1 -; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 ; RV32IZFH-NEXT: bnez a0, .LBB6_2 ; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: fmv.h fa0, fa1 @@ -152,12 +148,9 @@ define half @select_fcmp_one(half %a, half %b) nounwind { ; ; RV64IZFH-LABEL: select_fcmp_one: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa1, fa1 -; RV64IZFH-NEXT: feq.h a1, fa0, fa0 -; RV64IZFH-NEXT: and a0, a1, a0 -; RV64IZFH-NEXT: feq.h a1, fa0, fa1 -; RV64IZFH-NEXT: not a1, a1 -; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 ; RV64IZFH-NEXT: bnez a0, .LBB6_2 ; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fmv.h fa0, fa1 @@ -198,12 +191,10 @@ define half @select_fcmp_ord(half %a, half %b) nounwind { define half @select_fcmp_ueq(half %a, half %b) nounwind { ; RV32IZFH-LABEL: select_fcmp_ueq: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa0, fa1 -; RV32IZFH-NEXT: feq.h a1, fa1, fa1 -; RV32IZFH-NEXT: feq.h a2, fa0, fa0 -; RV32IZFH-NEXT: and a1, a2, a1 -; RV32IZFH-NEXT: xori a1, a1, 1 -; RV32IZFH-NEXT: or a0, a0, a1 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 +; RV32IZFH-NEXT: xori a0, a0, 1 ; RV32IZFH-NEXT: bnez a0, .LBB8_2 ; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: fmv.h fa0, fa1 @@ -212,12 +203,10 @@ define half @select_fcmp_ueq(half %a, half %b) nounwind { ; ; RV64IZFH-LABEL: select_fcmp_ueq: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa1 -; RV64IZFH-NEXT: feq.h a1, fa1, fa1 -; RV64IZFH-NEXT: feq.h a2, fa0, fa0 -; RV64IZFH-NEXT: and a1, a2, a1 -; RV64IZFH-NEXT: xori a1, a1, 1 -; RV64IZFH-NEXT: or a0, a0, a1 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 +; RV64IZFH-NEXT: xori a0, a0, 1 ; RV64IZFH-NEXT: bnez a0, .LBB8_2 ; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fmv.h fa0, fa1 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll index 3fefbb727413b1..47e346112ce6af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll @@ -304,12 +304,10 @@ define @fcmp_one_vv_nxv8f16( %va, %va, %vb ret %vc @@ -320,13 +318,10 @@ define @fcmp_one_vf_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v28 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -339,13 +334,10 @@ define @fcmp_one_fv_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v28, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -457,12 +449,10 @@ define @fcmp_ueq_vv_nxv8f16( %va, %va, %vb ret %vc @@ -473,13 +463,10 @@ define @fcmp_ueq_vf_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v28 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -492,13 +479,10 @@ define @fcmp_ueq_fv_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v28, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1221,12 +1205,10 @@ define @fcmp_one_vv_nxv8f32( %va, %va, %vb ret %vc @@ -1236,13 +1218,10 @@ define @fcmp_one_vf_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1254,13 +1233,10 @@ define @fcmp_one_fv_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1368,12 +1344,10 @@ define @fcmp_ueq_vv_nxv8f32( %va, %va, %vb ret %vc @@ -1383,13 +1357,10 @@ define @fcmp_ueq_vf_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1401,13 +1372,10 @@ define @fcmp_ueq_fv_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2122,12 +2090,10 @@ define @fcmp_one_vv_nxv8f64( %va, %va, %vb ret %vc @@ -2137,13 +2103,10 @@ define @fcmp_one_vf_nxv8f64( %va, double ; CHECK-LABEL: fcmp_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2155,13 +2118,10 @@ define @fcmp_one_fv_nxv8f64( %va, double ; CHECK-LABEL: fcmp_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2273,12 +2233,10 @@ define @fcmp_ueq_vv_nxv8f64( %va, %va, %vb ret %vc @@ -2288,13 +2246,10 @@ define @fcmp_ueq_vf_nxv8f64( %va, double ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2306,13 +2261,10 @@ define @fcmp_ueq_fv_nxv8f64( %va, double ; CHECK-LABEL: fcmp_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll index 0129169fa93de0..28a1f946aa49f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll @@ -304,12 +304,10 @@ define @fcmp_one_vv_nxv8f16( %va, %va, %vb ret %vc @@ -320,13 +318,10 @@ define @fcmp_one_vf_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v28 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -339,13 +334,10 @@ define @fcmp_one_fv_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v28, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -457,12 +449,10 @@ define @fcmp_ueq_vv_nxv8f16( %va, %va, %vb ret %vc @@ -473,13 +463,10 @@ define @fcmp_ueq_vf_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v28 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -492,13 +479,10 @@ define @fcmp_ueq_fv_nxv8f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v28, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1221,12 +1205,10 @@ define @fcmp_one_vv_nxv8f32( %va, %va, %vb ret %vc @@ -1236,13 +1218,10 @@ define @fcmp_one_vf_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1254,13 +1233,10 @@ define @fcmp_one_fv_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1368,12 +1344,10 @@ define @fcmp_ueq_vv_nxv8f32( %va, %va, %vb ret %vc @@ -1383,13 +1357,10 @@ define @fcmp_ueq_vf_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1401,13 +1372,10 @@ define @fcmp_ueq_fv_nxv8f32( %va, float %b ; CHECK-LABEL: fcmp_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2122,12 +2090,10 @@ define @fcmp_one_vv_nxv8f64( %va, %va, %vb ret %vc @@ -2137,13 +2103,10 @@ define @fcmp_one_vf_nxv8f64( %va, double ; CHECK-LABEL: fcmp_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2155,13 +2118,10 @@ define @fcmp_one_fv_nxv8f64( %va, double ; CHECK-LABEL: fcmp_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2273,12 +2233,10 @@ define @fcmp_ueq_vv_nxv8f64( %va, %va, %vb ret %vc @@ -2288,13 +2246,10 @@ define @fcmp_ueq_vf_nxv8f64( %va, double ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2306,13 +2261,10 @@ define @fcmp_ueq_fv_nxv8f64( %va, double ; CHECK-LABEL: fcmp_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll b/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll index e4753c4e426092..60b6381b16b69a 100644 --- a/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll +++ b/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll @@ -102,16 +102,14 @@ define i32 @oge_f32(float %x, float %y) { ; CHECK-NEXT: .functype ueq_f32 (f32, f32) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} -; CHECK-NEXT: return $pop[[NUM4]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} +; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 +; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}} +; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i32 @ueq_f32(float %x, float %y) { %a = fcmp ueq float %x, %y %b = zext i1 %a to i32 @@ -122,15 +120,11 @@ define i32 @ueq_f32(float %x, float %y) { ; CHECK-NEXT: .functype one_f32 (f32, f32) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.eq $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM4]] define i32 @one_f32(float %x, float %y) { %a = fcmp one float %x, %y diff --git a/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll b/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll index 37150e8cded70f..063a293b7fa8e3 100644 --- a/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll +++ b/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll @@ -101,16 +101,14 @@ define i32 @oge_f64(double %x, double %y) { ; CHECK-NEXT: .functype ueq_f64 (f64, f64) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} -; CHECK-NEXT: return $pop[[NUM4]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} +; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 +; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}} +; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i32 @ueq_f64(double %x, double %y) { %a = fcmp ueq double %x, %y %b = zext i1 %a to i32 @@ -121,15 +119,11 @@ define i32 @ueq_f64(double %x, double %y) { ; CHECK-NEXT: .functype one_f64 (f64, f64) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.eq $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM4]] define i32 @one_f64(double %x, double %y) { %a = fcmp one double %x, %y diff --git a/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll b/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll index 5fc91658efed2e..a77f9e1fa5818c 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll @@ -1000,11 +1000,9 @@ define <4 x i32> @compare_sext_ole_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: compare_one_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_one_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i1> @compare_one_v4f32 (<4 x float> %x, <4 x float> %y) { %res = fcmp one <4 x float> %x, %y @@ -1024,11 +1022,9 @@ define <4 x i1> @compare_one_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: compare_sext_one_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_sext_one_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @compare_sext_one_v4f32 (<4 x float> %x, <4 x float> %y) { %cmp = fcmp one <4 x float> %x, %y @@ -1100,11 +1096,10 @@ define <4 x i32> @compare_sext_ord_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: compare_ueq_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_ueq_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i1> @compare_ueq_v4f32 (<4 x float> %x, <4 x float> %y) { %res = fcmp ueq <4 x float> %x, %y @@ -1124,11 +1119,10 @@ define <4 x i1> @compare_ueq_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: compare_sext_ueq_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_sext_ueq_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]] define <4 x i32> @compare_sext_ueq_v4f32 (<4 x float> %x, <4 x float> %y) { %cmp = fcmp ueq <4 x float> %x, %y @@ -1628,11 +1622,9 @@ define <2 x i64> @compare_sext_ole_nnan_v2f64 (<2 x double> %x, <2 x double> %y) ; CHECK-LABEL: compare_one_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_one_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i1> @compare_one_v2f64 (<2 x double> %x, <2 x double> %y) { %res = fcmp one <2 x double> %x, %y @@ -1652,11 +1644,9 @@ define <2 x i1> @compare_one_nnan_v2f64 (<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: compare_sext_one_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_sext_one_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @compare_sext_one_v2f64 (<2 x double> %x, <2 x double> %y) { %cmp = fcmp one <2 x double> %x, %y @@ -1728,11 +1718,10 @@ define <2 x i64> @compare_sext_ord_nnan_v2f64 (<2 x double> %x, <2 x double> %y) ; CHECK-LABEL: compare_ueq_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_ueq_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i1> @compare_ueq_v2f64 (<2 x double> %x, <2 x double> %y) { %res = fcmp ueq <2 x double> %x, %y @@ -1752,11 +1741,10 @@ define <2 x i1> @compare_ueq_nnan_v2f64 (<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: compare_sext_ueq_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_sext_ueq_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @compare_sext_ueq_v2f64 (<2 x double> %x, <2 x double> %y) { %cmp = fcmp ueq <2 x double> %x, %y From 4718ec01669b01373180f4cd1256c6e2dd6f3999 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sun, 10 Jan 2021 20:41:50 -0500 Subject: [PATCH 50/86] [clangd] Avoid recursion in TargetFinder::add() Fixes https://github.com/clangd/clangd/issues/633 Differential Revision: https://reviews.llvm.org/D94382 --- clang-tools-extra/clangd/FindTarget.cpp | 10 +++++ clang-tools-extra/clangd/FindTarget.h | 3 ++ .../clangd/unittests/FindTargetTests.cpp | 41 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index 9a502a84e36ff4..84316659daaddf 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -330,6 +330,7 @@ struct TargetFinder { llvm::SmallDenseMap> Decls; + llvm::SmallDenseMap Seen; RelSet Flags; template void debug(T &Node, RelSet Flags) { @@ -359,6 +360,15 @@ struct TargetFinder { if (!D) return; debug(*D, Flags); + + // Avoid recursion (which can arise in the presence of heuristic + // resolution of dependent names) by exiting early if we have + // already seen this decl with all flags in Flags. + auto Res = Seen.try_emplace(D); + if (!Res.second && Res.first->second.contains(Flags)) + return; + Res.first->second |= Flags; + if (const UsingDirectiveDecl *UDD = llvm::dyn_cast(D)) D = UDD->getNominatedNamespaceAsWritten(); diff --git a/clang-tools-extra/clangd/FindTarget.h b/clang-tools-extra/clangd/FindTarget.h index 435e4f4ac038b1..92e4354d1eaadc 100644 --- a/clang-tools-extra/clangd/FindTarget.h +++ b/clang-tools-extra/clangd/FindTarget.h @@ -194,6 +194,9 @@ class DeclRelationSet { S &= Other.S; return *this; } + bool contains(DeclRelationSet Other) const { + return (S & Other.S) == Other.S; + } friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, DeclRelationSet); }; // The above operators can't be looked up if both sides are enums. diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index dd7e9878a6d51f..46e17dc053c043 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -787,6 +787,47 @@ TEST_F(TargetDeclTest, DependentTypes) { "template struct B"); } +TEST_F(TargetDeclTest, TypedefCascade) { + Code = R"cpp( + struct C { + using type = int; + }; + struct B { + using type = C::type; + }; + struct A { + using type = B::type; + }; + A::[[type]] waldo; + )cpp"; + EXPECT_DECLS("TypedefTypeLoc", + {"using type = int", Rel::Alias | Rel::Underlying}, + {"using type = C::type", Rel::Alias | Rel::Underlying}, + {"using type = B::type", Rel::Alias}); +} + +TEST_F(TargetDeclTest, RecursiveTemplate) { + Flags.push_back("-std=c++20"); // the test case uses concepts + + Code = R"cpp( + template + concept Leaf = false; + + template + struct descend_left { + using type = typename descend_left::[[type]]; + }; + + template + struct descend_left { + using type = typename Tree::value; + }; + )cpp"; + EXPECT_DECLS("DependentNameTypeLoc", + {"using type = typename descend_left::type", + Rel::Alias | Rel::Underlying}); +} + TEST_F(TargetDeclTest, ObjC) { Flags = {"-xobjective-c"}; Code = R"cpp( From f748e92295515ea7b39cd687a718915b559de6ec Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 11 Jan 2021 13:50:52 -0800 Subject: [PATCH 51/86] [NewPM] Run non-trivial loop unswitching under -O2/3/s/z Fixes https://bugs.llvm.org/show_bug.cgi?id=48715. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D94448 --- llvm/lib/Passes/PassBuilder.cpp | 2 +- llvm/test/Transforms/LoopUnroll/opt-levels.ll | 8 ++-- .../Transforms/SimpleLoopUnswitch/pipeline.ll | 39 +++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/pipeline.ll diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 527d19d6358923..0d7f442f9ff4e6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -724,7 +724,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz)); // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - LPM1.addPass(SimpleLoopUnswitchPass()); + LPM1.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ true)); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(IndVarSimplifyPass()); diff --git a/llvm/test/Transforms/LoopUnroll/opt-levels.ll b/llvm/test/Transforms/LoopUnroll/opt-levels.ll index ed0abc7672e08b..f268d9bd5506b7 100644 --- a/llvm/test/Transforms/LoopUnroll/opt-levels.ll +++ b/llvm/test/Transforms/LoopUnroll/opt-levels.ll @@ -7,10 +7,10 @@ ; the behavior, we artificially disable unrolling for anything but O3 by setting ; the default threshold to 0. -; O3: loop2.preheader -; O2-NOT: loop2.preheader -; Os-NOT: loop2.preheader -; Oz-NOT: loop2.preheader +; O3: loop1.preheader +; O2-NOT: loop1.preheader +; Os-NOT: loop1.preheader +; Oz-NOT: loop1.preheader define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind { entry: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pipeline.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pipeline.ll new file mode 100644 index 00000000000000..953b407cccd98f --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pipeline.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -S -passes="default" | FileCheck %s -check-prefix=O1 +; RUN: opt < %s -S -passes="default" | FileCheck %s -check-prefix=O2 + +declare i32 @a() +declare i32 @b() +declare i32 @c() + +; O1-NOT: loop_begin.us: +; O2: loop_begin.us: + +define i32 @test1(i1* %ptr, i1 %cond1, i1 %cond2) { +entry: + br label %loop_begin + +loop_begin: + br i1 %cond1, label %loop_a, label %loop_b + +loop_a: + call i32 @a() + br label %latch + +loop_b: + br i1 %cond2, label %loop_b_a, label %loop_b_b + +loop_b_a: + call i32 @b() + br label %latch + +loop_b_b: + call i32 @c() + br label %latch + +latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret i32 0 +} From a14040bd4d902419b53cf0ad576caa0f01eccf5c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 12 Jan 2021 10:52:53 -0800 Subject: [PATCH 52/86] [RISCV] Use vmerge.vim for llvm.riscv.vfmerge with a 0.0 scalar operand. We can use a 0 immediate to avoid needing to materialize 0 into an FPR first. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D94459 --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 8 + llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll | 154 +++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll | 210 ++++++++++++++++++ 3 files changed, 372 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index bf5ee06bce350d..3604a25b0d6ada 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3367,6 +3367,14 @@ defm "" : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE", defm "" : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE", /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +foreach fvti = AllFloatVectors in { + defvar instr = !cast("PseudoVMERGE_VIM_"#fvti.LMul.MX); + def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2), + (fvti.Scalar (fpimm0)), + (fvti.Mask V0), (XLenVT GPR:$vl))), + (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), (NoX0 GPR:$vl), fvti.SEW)>; +} + //===----------------------------------------------------------------------===// // 14.16. Vector Floating-Point Move Instruction //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll index a6b09704c8a67c..5a7262c348c5c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll @@ -439,3 +439,157 @@ entry: ret %a } + +define @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv1f16.f16( + %0, + half zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv2f16.f16( + %0, + half zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv4f16.f16( + %0, + half zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv8f16.f16( + %0, + half zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv16f16.f16( + %0, + half zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv32f16.f16( + %0, + half zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv1f32.f32( + %0, + float zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv2f32.f32( + %0, + float zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv4f32.f32( + %0, + float zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv8f32.f32( + %0, + float zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv16f32.f32( + %0, + float zeroinitializer, + %1, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll index 6f3cafce40e663..1a344e1cc0fa4d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll @@ -599,3 +599,213 @@ entry: ret %a } + +define @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv1f16.f16( + %0, + half zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv2f16.f16( + %0, + half zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv4f16.f16( + %0, + half zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv8f16.f16( + %0, + half zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv16f16.f16( + %0, + half zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv32f16.f16( + %0, + half zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv1f32.f32( + %0, + float zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv2f32.f32( + %0, + float zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv4f32.f32( + %0, + float zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv8f32.f32( + %0, + float zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv16f32.f32( + %0, + float zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv1f64.f64( + %0, + double zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv2f64.f64( + %0, + double zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv4f64.f64( + %0, + double zeroinitializer, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv8f64.f64( + %0, + double zeroinitializer, + %1, + i64 %2) + + ret %a +} From 08d4a50467ecef1337f8d7d9763c7738861bd6f6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 11 Jan 2021 16:33:22 +0000 Subject: [PATCH 53/86] [FunctionAttrs] Precommit tests for willreturn inference. Tests for D94502. --- .../Transforms/FunctionAttrs/willreturn.ll | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 llvm/test/Transforms/FunctionAttrs/willreturn.ll diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn.ll b/llvm/test/Transforms/FunctionAttrs/willreturn.ll new file mode 100644 index 00000000000000..56ca12638e9cc7 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/willreturn.ll @@ -0,0 +1,72 @@ +; RUN: opt -function-attrs -S %s | FileCheck %s + +; TODO +define void @mustprogress_readnone() mustprogress { +; CHECK-NOT: Function Attrs: {{.*}} willreturn +; CHECK: define void @mustprogress_readnone() +; +entry: + br label %while.body + +while.body: + br label %while.body +} + +; TODO +define i32 @mustprogress_load(i32* %ptr) mustprogress { +; CHECK-NOT: Function Attrs: {{.*}} willreturn +; CHECK: define i32 @mustprogress_load( +; +entry: + %r = load i32, i32* %ptr + ret i32 %r +} + +define void @mustprogress_store(i32* %ptr) mustprogress { +; CHECK-NOT: Function Attrs: {{.*}} willreturn +; CHECK: define void @mustprogress_store( +; +entry: + store i32 0, i32* %ptr + ret void +} + +declare void @unknown_fn() + +define void @mustprogress_call_unknown_fn() mustprogress { +; CHECK-NOT: Function Attrs: {{.*}} willreturn +; CHECK: define void @mustprogress_call_unknown_fn( +; + call void @unknown_fn() + ret void +} + +; TODO +define i32 @mustprogress_call_known_functions(i32* %ptr) mustprogress { +; CHECK-NOT: Function Attrs: {{.*}} willreturn +; CHECK: define i32 @mustprogress_call_known_functions( +; + call void @mustprogress_readnone() + %r = call i32 @mustprogress_load(i32* %ptr) + ret i32 %r +} + +declare i32 @__gxx_personality_v0(...) + +; TODO +define i64 @mustprogress_mayunwind() mustprogress personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-NOT: Function Attrs: {{.*}} willreturn +; CHECK: define i64 @mustprogress_mayunwind( +; + %a = invoke i64 @fn_noread() + to label %A unwind label %B +A: + ret i64 10 + +B: + %val = landingpad { i8*, i32 } + catch i8* null + ret i64 0 +} + +declare i64 @fn_noread() readnone From eef4bdbb34de2dda657668c2ab39397e61e36a0a Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Fri, 18 Dec 2020 15:11:51 -0500 Subject: [PATCH 54/86] [libc++] Add a missing `<_Compare>` template argument. Sometimes `_Compare` is an lvalue reference type, so letting it be deduced is pretty much always wrong. (Well, less efficient than it could be, anyway.) Differential Revision: https://reviews.llvm.org/D93562 --- libcxx/include/algorithm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 7a4cc39dbeab14..fe9caf475f5ac6 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -4483,7 +4483,7 @@ __buffered_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator value_type* __p = __buff; for (_BidirectionalIterator __i = __first; __i != __middle; __d.template __incr(), (void) ++__i, (void) ++__p) ::new ((void*)__p) value_type(_VSTD::move(*__i)); - _VSTD::__half_inplace_merge(__buff, __p, __middle, __last, __first, __comp); + _VSTD::__half_inplace_merge<_Compare>(__buff, __p, __middle, __last, __first, __comp); } else { @@ -4492,9 +4492,10 @@ __buffered_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator ::new ((void*)__p) value_type(_VSTD::move(*__i)); typedef reverse_iterator<_BidirectionalIterator> _RBi; typedef reverse_iterator _Rv; - _VSTD::__half_inplace_merge(_Rv(__p), _Rv(__buff), + typedef __invert<_Compare> _Inverted; + _VSTD::__half_inplace_merge<_Inverted>(_Rv(__p), _Rv(__buff), _RBi(__middle), _RBi(__first), - _RBi(__last), _VSTD::__invert<_Compare>(__comp)); + _RBi(__last), _Inverted(__comp)); } } From 79f99ba65d96a35a79911daf1b67559dd52a684d Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Tue, 12 Jan 2021 14:16:15 -0500 Subject: [PATCH 55/86] [libcxx] Port to OpenBSD Add initial OpenBSD support. Reviewed By: ldionne Differential Revision: https://reviews.llvm.org/D94205 --- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__config | 15 ++++++++++----- libcxx/include/__locale | 2 ++ libcxx/include/support/openbsd/xlocale.h | 19 +++++++++++++++++++ 4 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 libcxx/include/support/openbsd/xlocale.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index cd12f60a049cc5..2ffdf07efcd4f2 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -159,6 +159,7 @@ set(files support/musl/xlocale.h support/newlib/xlocale.h support/nuttx/xlocale.h + support/openbsd/xlocale.h support/solaris/floatingpoint.h support/solaris/wchar.h support/solaris/xlocale.h diff --git a/libcxx/include/__config b/libcxx/include/__config index 4537d249cf4f86..f1606c6d3b1c9a 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -264,14 +264,14 @@ # endif // __LONG_LONG_SUPPORTED #endif // __FreeBSD__ -#ifdef __NetBSD__ +#if defined(__NetBSD__) || defined(__OpenBSD__) # include # if _BYTE_ORDER == _LITTLE_ENDIAN # define _LIBCPP_LITTLE_ENDIAN # else // _BYTE_ORDER == _LITTLE_ENDIAN # define _LIBCPP_BIG_ENDIAN # endif // _BYTE_ORDER == _LITTLE_ENDIAN -#endif // __NetBSD__ +#endif // defined(__NetBSD__) || defined(__OpenBSD__) #if defined(_WIN32) # define _LIBCPP_WIN32API @@ -312,7 +312,7 @@ # endif #endif // __sun__ -#if defined(__CloudABI__) +#if defined(__OpenBSD__) || defined(__CloudABI__) // Certain architectures provide arc4random(). Prefer using // arc4random() over /dev/{u,}random to make it possible to obtain // random data even when using sandboxing mechanisms such as chroots, @@ -370,6 +370,9 @@ # define _LIBCPP_HAS_ALIGNED_ALLOC # define _LIBCPP_HAS_QUICK_EXIT # define _LIBCPP_HAS_TIMESPEC_GET +# elif defined(__OpenBSD__) +# define _LIBCPP_HAS_ALIGNED_ALLOC +# define _LIBCPP_HAS_TIMESPEC_GET # elif defined(__linux__) # if !defined(_LIBCPP_HAS_MUSL_LIBC) # if _LIBCPP_GLIBC_PREREQ(2, 15) || defined(__BIONIC__) @@ -1109,6 +1112,7 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( # if defined(__FreeBSD__) || \ defined(__wasi__) || \ defined(__NetBSD__) || \ + defined(__OpenBSD__) || \ defined(__NuttX__) || \ defined(__linux__) || \ defined(__GNU__) || \ @@ -1204,14 +1208,15 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( // Some systems do not provide gets() in their C library, for security reasons. #ifndef _LIBCPP_C_HAS_NO_GETS # if defined(_LIBCPP_MSVCRT) || \ - (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) + (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) || \ + defined(__OpenBSD__) # define _LIBCPP_C_HAS_NO_GETS # endif #endif #if defined(__BIONIC__) || defined(__CloudABI__) || defined(__NuttX__) || \ defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) || \ - defined(__MVS__) + defined(__MVS__) || defined(__OpenBSD__) #define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE #endif diff --git a/libcxx/include/__locale b/libcxx/include/__locale index f32bd59ae58564..4e9e0c08acf0e6 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -33,6 +33,8 @@ # include #elif defined(_NEWLIB_VERSION) # include +#elif defined(__OpenBSD__) +# include #elif (defined(__APPLE__) || defined(__FreeBSD__) \ || defined(__EMSCRIPTEN__) || defined(__IBMCPP__)) # include diff --git a/libcxx/include/support/openbsd/xlocale.h b/libcxx/include/support/openbsd/xlocale.h new file mode 100644 index 00000000000000..fbfaedd127c6c8 --- /dev/null +++ b/libcxx/include/support/openbsd/xlocale.h @@ -0,0 +1,19 @@ +// -*- C++ -*- +//===-------------------- support/openbsd/xlocale.h -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_SUPPORT_OPENBSD_XLOCALE_H +#define _LIBCPP_SUPPORT_OPENBSD_XLOCALE_H + +#include +#include +#include +#include +#include + +#endif From 7ecad2e4ced180b4fdebc6b7bf6d26d83b454318 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 24 Dec 2020 17:04:40 +0100 Subject: [PATCH 56/86] [InstSimplify] Don't fold gep p, -p to null This is a partial fix for https://bugs.llvm.org/show_bug.cgi?id=44403. Folding gep p, q-p to q is only legal if p and q have the same provenance. This fold should probably be guarded by something like getUnderlyingObject(p) == getUnderlyingObject(q). This patch is a partial fix that removes the special handling for gep p, 0-p, which will fold to a null pointer, which would certainly not pass an underlying object check (unless p is also null, in which case this would fold trivially anyway). Folding to a null pointer is particularly problematic due to the special handling it receives in many places, making end-to-end miscompiles more likely. Differential Revision: https://reviews.llvm.org/D93820 --- llvm/lib/Analysis/InstructionSimplify.cpp | 24 ++++++++++----- llvm/test/Transforms/InstSimplify/gep.ll | 37 +++++++++++++++++++---- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 96a3ada89db4ac..2ae4228495e349 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4270,9 +4270,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == Q.DL.getPointerSizeInBits(AS)) { - auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { - if (match(P, m_Zero())) - return Constant::getNullValue(GEPTy); + auto PtrToInt = [GEPTy](Value *P) -> Value * { Value *Temp; if (match(P, m_PtrToInt(m_Value(Temp)))) if (Temp->getType() == GEPTy) @@ -4280,10 +4278,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, return nullptr; }; + // FIXME: The following transforms are only legal if P and V have the + // same provenance (PR44403). Check whether getUnderlyingObject() is + // the same? + // getelementptr V, (sub P, V) -> P if P points to a type of size 1. if (TyAllocSize == 1 && match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))))) - if (Value *R = PtrToIntOrZero(P)) + if (Value *R = PtrToInt(P)) return R; // getelementptr V, (ashr (sub P, V), C) -> Q @@ -4292,7 +4294,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), m_ConstantInt(C))) && TyAllocSize == 1ULL << C) - if (Value *R = PtrToIntOrZero(P)) + if (Value *R = PtrToInt(P)) return R; // getelementptr V, (sdiv (sub P, V), C) -> Q @@ -4300,7 +4302,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, if (match(Ops[1], m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), m_SpecificInt(TyAllocSize)))) - if (Value *R = PtrToIntOrZero(P)) + if (Value *R = PtrToInt(P)) return R; } } @@ -4317,15 +4319,21 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, BasePtrOffset); + // Avoid creating inttoptr of zero here: While LLVMs treatment of + // inttoptr is generally conservative, this particular case is folded to + // a null pointer, which will have incorrect provenance. + // gep (gep V, C), (sub 0, V) -> C if (match(Ops.back(), - m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) { + m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) && + !BasePtrOffset.isNullValue()) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset); return ConstantExpr::getIntToPtr(CI, GEPTy); } // gep (gep V, C), (xor V, -1) -> C-1 if (match(Ops.back(), - m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) { + m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) && + !BasePtrOffset.isOneValue()) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1); return ConstantExpr::getIntToPtr(CI, GEPTy); } diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll index e6670e4a9345b9..8fff9e99d34b59 100644 --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -40,9 +40,16 @@ define i64* @test3(i64* %b, i64* %e) { ret i64* %gep } +; The following tests should not be folded to null, because this would +; lose provenance of the base pointer %b. + define %struct.A* @test4(%struct.A* %b) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: ret %struct.A* null +; CHECK-NEXT: [[B_PTR:%.*]] = ptrtoint %struct.A* [[B:%.*]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[B_PTR]] +; CHECK-NEXT: [[SDIV:%.*]] = sdiv exact i64 [[SUB]], 7 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.A* [[B]], i64 [[SDIV]] +; CHECK-NEXT: ret %struct.A* [[GEP]] ; %b_ptr = ptrtoint %struct.A* %b to i64 %sub = sub i64 0, %b_ptr @@ -53,7 +60,11 @@ define %struct.A* @test4(%struct.A* %b) { define %struct.A* @test4_inbounds(%struct.A* %b) { ; CHECK-LABEL: @test4_inbounds( -; CHECK-NEXT: ret %struct.A* null +; CHECK-NEXT: [[B_PTR:%.*]] = ptrtoint %struct.A* [[B:%.*]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[B_PTR]] +; CHECK-NEXT: [[SDIV:%.*]] = sdiv exact i64 [[SUB]], 7 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], %struct.A* [[B]], i64 [[SDIV]] +; CHECK-NEXT: ret %struct.A* [[GEP]] ; %b_ptr = ptrtoint %struct.A* %b to i64 %sub = sub i64 0, %b_ptr @@ -64,7 +75,10 @@ define %struct.A* @test4_inbounds(%struct.A* %b) { define i8* @test5(i8* %b) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: ret i8* null +; CHECK-NEXT: [[B_PTR:%.*]] = ptrtoint i8* [[B:%.*]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[B_PTR]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[B]], i64 [[SUB]] +; CHECK-NEXT: ret i8* [[GEP]] ; %b_ptr = ptrtoint i8* %b to i64 %sub = sub i64 0, %b_ptr @@ -74,7 +88,10 @@ define i8* @test5(i8* %b) { define i8* @test5_inbounds(i8* %b) { ; CHECK-LABEL: @test5_inbounds( -; CHECK-NEXT: ret i8* null +; CHECK-NEXT: [[B_PTR:%.*]] = ptrtoint i8* [[B:%.*]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[B_PTR]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[B]], i64 [[SUB]] +; CHECK-NEXT: ret i8* [[GEP]] ; %b_ptr = ptrtoint i8* %b to i64 %sub = sub i64 0, %b_ptr @@ -84,7 +101,11 @@ define i8* @test5_inbounds(i8* %b) { define i64* @test6(i64* %b) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: ret i64* null +; CHECK-NEXT: [[B_PTR:%.*]] = ptrtoint i64* [[B:%.*]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[B_PTR]] +; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i64 [[SUB]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[ASHR]] +; CHECK-NEXT: ret i64* [[GEP]] ; %b_ptr = ptrtoint i64* %b to i64 %sub = sub i64 0, %b_ptr @@ -95,7 +116,11 @@ define i64* @test6(i64* %b) { define i64* @test6_inbounds(i64* %b) { ; CHECK-LABEL: @test6_inbounds( -; CHECK-NEXT: ret i64* null +; CHECK-NEXT: [[B_PTR:%.*]] = ptrtoint i64* [[B:%.*]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[B_PTR]] +; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i64 [[SUB]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[ASHR]] +; CHECK-NEXT: ret i64* [[GEP]] ; %b_ptr = ptrtoint i64* %b to i64 %sub = sub i64 0, %b_ptr From bdd1ad5e5c57ae0f0bf899517c540ad8a679f01a Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 12 Jan 2021 14:32:27 -0500 Subject: [PATCH 57/86] [OpenMP] Fixed include directories for OpenMP when building OpenMP with LLVM_ENABLE_RUNTIMES Some LLVM headers are generated by CMake. Before the installation, LLVM's headers are distributed everywhere, some of which are in `${LLVM_SRC_ROOT}/llvm/include/llvm`, and some are in `${LLVM_BINARY_ROOT}/include/llvm`. After intallation, they're all in `${LLVM_INSTALLATION_ROOT}/include/llvm`. OpenMP now depends on LLVM headers. Some headers depend on headers generated by CMake. When building OpenMP along with LLVM, a.k.a via `LLVM_ENABLE_RUNTIMES`, we need to tell OpenMP where it can find those headers, especially those still have not been copied/installed. Reviewed By: jdoerfert, jhuber6 Differential Revision: https://reviews.llvm.org/D94534 --- openmp/CMakeLists.txt | 12 +++++++----- openmp/libomptarget/CMakeLists.txt | 4 ++-- openmp/libomptarget/plugins/amdgpu/CMakeLists.txt | 6 +++--- openmp/libomptarget/src/CMakeLists.txt | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index dc0d3a6e718acf..12e8d542f9f6f8 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -39,6 +39,8 @@ else() set(OPENMP_TEST_C_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang.exe) set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++.exe) endif() + + list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_BINARY_DIR}/include) endif() # Check and set up common compiler flags. @@ -67,16 +69,16 @@ if (APPLE OR WIN32 OR NOT OPENMP_HAVE_STD_CPP14_FLAG) endif() # Attempt to locate LLVM source, required by libomptarget -if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) +if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS) if (LLVM_MAIN_INCLUDE_DIR) - set(LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_INCLUDE_DIR}) + list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LLVM_MAIN_INCLUDE_DIR}) elseif (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) - set(LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) + list(APPENDset LIBOMPTARGET_LLVM_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) endif() endif() -if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) - message(STATUS "Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR, disabling libomptarget") +if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS) + message(STATUS "Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS, disabling libomptarget") set(ENABLE_LIBOMPTARGET OFF) endif() diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt index 06db7b4c35e27f..6c90ced107eb79 100644 --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -31,8 +31,8 @@ include(LibomptargetUtils) include(LibomptargetGetDependencies) # LLVM source tree is required at build time for libomptarget -if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) - message(FATAL_ERROR "Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR") +if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS) + message(FATAL_ERROR "Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS") endif() # This is a list of all the targets that are supported/tested right now. diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt index 2d58388c80bb53..43934b52e42bed 100644 --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -30,8 +30,8 @@ if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_ return() endif() -if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR) - libomptarget_say("Not building AMDGPU plugin: Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR") +if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS) + libomptarget_say("Not building AMDGPU plugin: Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS") return() endif() @@ -50,7 +50,7 @@ endif() include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/impl - ${LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR} + ${LIBOMPTARGET_LLVM_INCLUDE_DIRS} ) add_library(omptarget.rtl.amdgpu SHARED diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt index 4088f59042fce0..38eaf455f95b52 100644 --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -20,7 +20,7 @@ set(LIBOMPTARGET_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp ) -include_directories(${LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR}) +include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS}) # Build libomptarget library with libdl dependency. Add LLVMSupport # dependency if building in-tree with profiling enabled. From 33e2494bea653a845cb0502cc6d3cecdf2b47750 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Tue, 12 Jan 2021 19:40:02 +0000 Subject: [PATCH 58/86] [libomptarget][amdgpu][nfc] Fix build on centos [libomptarget][amdgpu][nfc] Fix build on centos rtl.cpp replaced 224 with a #define from elf.h, but that doesn't work on a centos 7 build machine with an old elf.h Reviewed By: ronlieb Differential Revision: https://reviews.llvm.org/D94528 --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 437846f8b15b84..bd450f9898faf7 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -638,7 +638,7 @@ void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) { } bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { - const uint16_t amdgcnMachineID = EM_AMDGPU; + const uint16_t amdgcnMachineID = 224; // EM_AMDGPU may not be in system elf.h int32_t r = elf_check_machine(image, amdgcnMachineID); if (!r) { DP("Supported machine ID not found\n"); From e5f51fdd650c6d20c81fedb8e856e9858aa10991 Mon Sep 17 00:00:00 2001 From: David Truby Date: Thu, 3 Dec 2020 11:25:57 +0000 Subject: [PATCH 59/86] [clang][aarch64] Precondition isHomogeneousAggregate on isCXX14Aggregate MSVC on WoA64 includes isCXX14Aggregate in its definition. This is de-facto specification on that platform, so match msvc's behaviour. Fixes: https://bugs.llvm.org/show_bug.cgi?id=47611 Co-authored-by: Peter Waller Differential Revision: https://reviews.llvm.org/D92751 --- clang/lib/CodeGen/CGCXXABI.h | 7 ++ clang/lib/CodeGen/MicrosoftCXXABI.cpp | 16 +++- clang/lib/CodeGen/TargetInfo.cpp | 6 +- .../CodeGenCXX/homogeneous-aggregates.cpp | 69 +++++++++++++++++ .../CodeGen/AArch64/arm64-windows-calls.ll | 77 +++++++++++++++++++ 5 files changed, 172 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h index 171428a3525d0b..ea839db7528ebf 100644 --- a/clang/lib/CodeGen/CGCXXABI.h +++ b/clang/lib/CodeGen/CGCXXABI.h @@ -146,6 +146,13 @@ class CGCXXABI { /// 'this' parameter of C++ instance methods. virtual bool isSRetParameterAfterThis() const { return false; } + /// Returns true if the ABI permits the argument to be a homogeneous + /// aggregate. + virtual bool + isPermittedToBeHomogeneousAggregate(const CXXRecordDecl *RD) const { + return true; + }; + /// Find the LLVM type used to represent the given member pointer /// type. virtual llvm::Type * diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index c16c72dc93d5c1..cb0dc1d5d7173b 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -771,6 +771,9 @@ class MicrosoftCXXABI : public CGCXXABI { LoadVTablePtr(CodeGenFunction &CGF, Address This, const CXXRecordDecl *RD) override; + virtual bool + isPermittedToBeHomogeneousAggregate(const CXXRecordDecl *RD) const override; + private: typedef std::pair VFTableIdTy; typedef llvm::DenseMap VTablesMapTy; @@ -1070,7 +1073,7 @@ bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const { return isDeletingDtor(GD); } -static bool isCXX14Aggregate(const CXXRecordDecl *RD) { +static bool isTrivialForAArch64MSVC(const CXXRecordDecl *RD) { // For AArch64, we use the C++14 definition of an aggregate, so we also // check for: // No private or protected non static data members. @@ -1107,7 +1110,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { bool isTrivialForABI = RD->isPOD(); bool isAArch64 = CGM.getTarget().getTriple().isAArch64(); if (isAArch64) - isTrivialForABI = RD->canPassInRegisters() && isCXX14Aggregate(RD); + isTrivialForABI = RD->canPassInRegisters() && isTrivialForAArch64MSVC(RD); // MSVC always returns structs indirectly from C++ instance methods. bool isIndirectReturn = !isTrivialForABI || FI.isInstanceMethod(); @@ -4358,3 +4361,12 @@ MicrosoftCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This, performBaseAdjustment(CGF, This, QualType(RD->getTypeForDecl(), 0)); return {CGF.GetVTablePtr(This, CGM.Int8PtrTy, RD), RD}; } + +bool MicrosoftCXXABI::isPermittedToBeHomogeneousAggregate( + const CXXRecordDecl *CXXRD) const { + // MSVC Windows on Arm64 considers a type not HFA if it is not an + // aggregate according to the C++14 spec. This is not consistent with the + // AAPCS64, but is defacto spec on that platform. + return !CGM.getTarget().getTriple().isAArch64() || + isTrivialForAArch64MSVC(CXXRD); +} diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index d36c7344e28434..9a11a0720f3cb1 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5065,8 +5065,12 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, Members = 0; - // If this is a C++ record, check the bases first. + // If this is a C++ record, check the properties of the record such as + // bases and ABI specific restrictions if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { + if (!getCXXABI().isPermittedToBeHomogeneousAggregate(CXXRD)) + return false; + for (const auto &I : CXXRD->bases()) { // Ignore empty records. if (isEmptyRecord(getContext(), I.getType(), true)) diff --git a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp index 2b3af4226407c5..0fa30b2663bfc4 100644 --- a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp +++ b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -mfloat-abi hard -triple armv7-unknown-linux-gnueabi -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM32 // RUN: %clang_cc1 -mfloat-abi hard -triple aarch64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM64 // RUN: %clang_cc1 -mfloat-abi hard -triple x86_64-unknown-windows-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -mfloat-abi hard -triple aarch64-unknown-windows-msvc -emit-llvm -o - %s | FileCheck %s --check-prefix=WOA64 #if defined(__x86_64__) #define CC __attribute__((vectorcall)) @@ -104,3 +105,71 @@ struct HVAWithEmptyBitField : Float1, Float2 { // ARM32: define{{.*}} arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce) // X64: define dso_local x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(%struct.HVAWithEmptyBitField inreg %a.coerce) void CC with_empty_bitfield(HVAWithEmptyBitField a) {} + +namespace pr47611 { +// MSVC on Arm includes "isCXX14Aggregate" as part of its definition of +// Homogeneous Floating-point Aggregate (HFA). Additionally, it has a different +// handling of C++14 aggregates, which can lead to confusion. + +// Pod is a trivial HFA. +struct Pod { + double b[2]; +}; +// Not an aggregate according to C++14 spec => not HFA according to MSVC. +struct NotCXX14Aggregate { + NotCXX14Aggregate(); + Pod p; +}; +// NotPod is a C++14 aggregate. But not HFA, because it contains +// NotCXX14Aggregate (which itself is not HFA because it's not a C++14 +// aggregate). +struct NotPod { + NotCXX14Aggregate x; +}; +struct Empty {}; +// A class with a base is returned using the sret calling convetion by MSVC. +struct HasEmptyBase : public Empty { + double b[2]; +}; +struct HasPodBase : public Pod {}; +// WOA64-LABEL: define dso_local %"struct.pr47611::Pod" @"?copy@pr47611@@YA?AUPod@1@PEAU21@@Z"(%"struct.pr47611::Pod"* %x) +Pod copy(Pod *x) { return *x; } // MSVC: ldp d0,d1,[x0], Clang: ldp d0,d1,[x0] +// WOA64-LABEL: define dso_local void @"?copy@pr47611@@YA?AUNotCXX14Aggregate@1@PEAU21@@Z"(%"struct.pr47611::NotCXX14Aggregate"* inreg noalias sret(%"struct.pr47611::NotCXX14Aggregate") align 8 %agg.result, %"struct.pr47611::NotCXX14Aggregate"* %x) +NotCXX14Aggregate copy(NotCXX14Aggregate *x) { return *x; } // MSVC: stp x8,x9,[x0], Clang: str q0,[x0] +// WOA64-LABEL: define dso_local [2 x i64] @"?copy@pr47611@@YA?AUNotPod@1@PEAU21@@Z"(%"struct.pr47611::NotPod"* %x) +NotPod copy(NotPod *x) { return *x; } +// WOA64-LABEL: define dso_local void @"?copy@pr47611@@YA?AUHasEmptyBase@1@PEAU21@@Z"(%"struct.pr47611::HasEmptyBase"* inreg noalias sret(%"struct.pr47611::HasEmptyBase") align 8 %agg.result, %"struct.pr47611::HasEmptyBase"* %x) +HasEmptyBase copy(HasEmptyBase *x) { return *x; } +// WOA64-LABEL: define dso_local void @"?copy@pr47611@@YA?AUHasPodBase@1@PEAU21@@Z"(%"struct.pr47611::HasPodBase"* inreg noalias sret(%"struct.pr47611::HasPodBase") align 8 %agg.result, %"struct.pr47611::HasPodBase"* %x) +HasPodBase copy(HasPodBase *x) { return *x; } + +void call_copy_pod(Pod *pod) { + *pod = copy(pod); + // WOA64-LABEL: define dso_local void @"?call_copy_pod@pr47611@@YAXPEAUPod@1@@Z" + // WOA64: %{{.*}} = call %"struct.pr47611::Pod" @"?copy@pr47611@@YA?AUPod@1@PEAU21@@Z"(%"struct.pr47611::Pod"* %{{.*}}) +} + +void call_copy_notcxx14aggregate(NotCXX14Aggregate *notcxx14aggregate) { + *notcxx14aggregate = copy(notcxx14aggregate); + // WOA64-LABEL: define dso_local void @"?call_copy_notcxx14aggregate@pr47611@@YAXPEAUNotCXX14Aggregate@1@@Z" + // WOA64: call void @"?copy@pr47611@@YA?AUNotCXX14Aggregate@1@PEAU21@@Z"(%"struct.pr47611::NotCXX14Aggregate"* inreg sret(%"struct.pr47611::NotCXX14Aggregate") align 8 %{{.*}}, %"struct.pr47611::NotCXX14Aggregate"* %{{.*}}) +} + +void call_copy_notpod(NotPod *notPod) { + *notPod = copy(notPod); + // WOA64-LABEL: define dso_local void @"?call_copy_notpod@pr47611@@YAXPEAUNotPod@1@@Z" + // WOA64: %{{.*}} = call [2 x i64] @"?copy@pr47611@@YA?AUNotPod@1@PEAU21@@Z"(%"struct.pr47611::NotPod"* %{{.*}}) +} + +void call_copy_hasemptybase(HasEmptyBase *hasEmptyBase) { + *hasEmptyBase = copy(hasEmptyBase); + // WOA64-LABEL: define dso_local void @"?call_copy_hasemptybase@pr47611@@YAXPEAUHasEmptyBase@1@@Z" + // WOA64: call void @"?copy@pr47611@@YA?AUHasEmptyBase@1@PEAU21@@Z"(%"struct.pr47611::HasEmptyBase"* inreg sret(%"struct.pr47611::HasEmptyBase") align 8 %{{.*}}, %"struct.pr47611::HasEmptyBase"* %{{.*}}) +} + +void call_copy_haspodbase(HasPodBase *hasPodBase) { + *hasPodBase = copy(hasPodBase); + // WOA64-LABEL: define dso_local void @"?call_copy_haspodbase@pr47611@@YAXPEAUHasPodBase@1@@Z" + // WOA64: call void @"?copy@pr47611@@YA?AUHasPodBase@1@PEAU21@@Z"(%"struct.pr47611::HasPodBase"* inreg sret(%"struct.pr47611::HasPodBase") align 8 %{{.*}}, %"struct.pr47611::HasPodBase"* %{{.*}}) +} +}; // namespace pr47611 diff --git a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll index 1fee2246b75109..47aefd3589140d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll +++ b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll @@ -98,3 +98,80 @@ entry: %this1 = load %class.C*, %class.C** %this.addr, align 8 ret void } + +; The following tests correspond to tests in +; clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp + +; Pod is a trivial HFA +%struct.Pod = type { [2 x double] } +; Not an aggregate according to C++14 spec => not HFA according to MSVC +%struct.NotCXX14Aggregate = type { %struct.Pod } +; NotPod is a C++14 aggregate. But not HFA, because it contains +; NotCXX14Aggregate (which itself is not HFA because it's not a C++14 +; aggregate). +%struct.NotPod = type { %struct.NotCXX14Aggregate } + +; CHECK-LABEL: copy_pod: +define dso_local %struct.Pod @copy_pod(%struct.Pod* %x) { + %x1 = load %struct.Pod, %struct.Pod* %x, align 8 + ret %struct.Pod %x1 + ; CHECK: ldp d0, d1, [x0] +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +; CHECK-LABEL: copy_notcxx14aggregate: +define dso_local void +@copy_notcxx14aggregate(%struct.NotCXX14Aggregate* inreg noalias sret(%struct.NotCXX14Aggregate) align 8 %agg.result, + %struct.NotCXX14Aggregate* %x) { + %1 = bitcast %struct.NotCXX14Aggregate* %agg.result to i8* + %2 = bitcast %struct.NotCXX14Aggregate* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 %2, i64 16, i1 false) + ret void + ; CHECK: str q0, [x0] +} + +; CHECK-LABEL: copy_notpod: +define dso_local [2 x i64] @copy_notpod(%struct.NotPod* %x) { + %x1 = bitcast %struct.NotPod* %x to [2 x i64]* + %x2 = load [2 x i64], [2 x i64]* %x1 + ret [2 x i64] %x2 + ; CHECK: ldp x8, x1, [x0] + ; CHECK: mov x0, x8 +} + +@Pod = external global %struct.Pod + +; CHECK-LABEL: call_copy_pod: +define void @call_copy_pod() { + %x = call %struct.Pod @copy_pod(%struct.Pod* @Pod) + store %struct.Pod %x, %struct.Pod* @Pod + ret void + ; CHECK: bl copy_pod + ; CHECK-NEXT: stp d0, d1, [{{.*}}] +} + +@NotCXX14Aggregate = external global %struct.NotCXX14Aggregate + +; CHECK-LABEL: call_copy_notcxx14aggregate: +define void @call_copy_notcxx14aggregate() { + %x = alloca %struct.NotCXX14Aggregate + call void @copy_notcxx14aggregate(%struct.NotCXX14Aggregate* %x, %struct.NotCXX14Aggregate* @NotCXX14Aggregate) + %x1 = load %struct.NotCXX14Aggregate, %struct.NotCXX14Aggregate* %x + store %struct.NotCXX14Aggregate %x1, %struct.NotCXX14Aggregate* @NotCXX14Aggregate + ret void + ; CHECK: bl copy_notcxx14aggregate + ; CHECK-NEXT: ldp {{.*}}, {{.*}}, [sp] +} + +@NotPod = external global %struct.NotPod + +; CHECK-LABEL: call_copy_notpod: +define void @call_copy_notpod() { + %x = call [2 x i64] @copy_notpod(%struct.NotPod* @NotPod) + %notpod = bitcast %struct.NotPod* @NotPod to [2 x i64]* + store [2 x i64] %x, [2 x i64]* %notpod + ret void + ; CHECK: bl copy_notpod + ; CHECK-NEXT: stp x0, x1, [{{.*}}] +} From 6cd44b204c6c6f2e915270af6792f247c4c23abc Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 12 Jan 2021 19:55:17 +0000 Subject: [PATCH 60/86] [FunctionAttrs] Derive willreturn for fns with readonly` & `mustprogress`. Similar to D94125, derive `willreturn` for functions that are `readonly` and `mustprogress` in FunctionAttrs. To quote the reasoning from D94125: Since D86233 we have `mustprogress` which, in combination with `readonly`, implies `willreturn`. The idea is that every side-effect has to be modeled as a "write". Consequently, `readonly` means there is no side-effect, and `mustprogress` guarantees that we cannot "loop" forever without side-effect. Reviewed By: jdoerfert, nikic Differential Revision: https://reviews.llvm.org/D94502 --- llvm/include/llvm/IR/Function.h | 4 ++++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 18 +++++++++++++++ .../Transforms/FunctionAttrs/willreturn.ll | 22 ++++++++----------- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 019e3a98a1af5f..7e209bb3769be1 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -640,6 +640,10 @@ class Function : public GlobalObject, public ilist_node { } void setMustProgress() { addFnAttr(Attribute::MustProgress); } + /// Determine if the function will return. + bool willReturn() const { return hasFnAttribute(Attribute::WillReturn); } + void setWillReturn() { addFnAttr(Attribute::WillReturn); } + /// True if the ABI mandates (or the user requested) that this /// function be in a unwind table. bool hasUWTable() const { diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 5cf5e9463b451c..2e24cad1393b1d 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -77,6 +77,7 @@ STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); STATISTIC(NumNoFree, "Number of functions marked as nofree"); +STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); static cl::opt EnableNonnullArgPropagation( "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, @@ -1424,6 +1425,22 @@ static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) { return Changed; } +// Set the willreturn function attribute if possible. +static bool addWillReturn(const SCCNodeSet &SCCNodes) { + bool Changed = false; + + for (Function *F : SCCNodes) { + if (!F || !F->onlyReadsMemory() || !F->mustProgress() || F->willReturn()) + continue; + + F->setWillReturn(); + NumWillReturn++; + Changed = true; + } + + return Changed; +} + static SCCNodesResult createSCCNodeSet(ArrayRef Functions) { SCCNodesResult Res; Res.HasUnknownCall = false; @@ -1468,6 +1485,7 @@ static bool deriveAttrsInPostOrder(ArrayRef Functions, Changed |= addArgumentAttrs(Nodes.SCCNodes); Changed |= inferConvergent(Nodes.SCCNodes); Changed |= addNoReturnAttrs(Nodes.SCCNodes); + Changed |= addWillReturn(Nodes.SCCNodes); // If we have no external nodes participating in the SCC, we can deduce some // more precise attributes as well. diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn.ll b/llvm/test/Transforms/FunctionAttrs/willreturn.ll index 56ca12638e9cc7..d92151c299fecf 100644 --- a/llvm/test/Transforms/FunctionAttrs/willreturn.ll +++ b/llvm/test/Transforms/FunctionAttrs/willreturn.ll @@ -1,9 +1,8 @@ ; RUN: opt -function-attrs -S %s | FileCheck %s -; TODO define void @mustprogress_readnone() mustprogress { -; CHECK-NOT: Function Attrs: {{.*}} willreturn -; CHECK: define void @mustprogress_readnone() +; CHECK: Function Attrs: {{.*}} noreturn {{.*}} readnone willreturn +; CHECK-NEXT: define void @mustprogress_readnone() ; entry: br label %while.body @@ -12,10 +11,9 @@ while.body: br label %while.body } -; TODO define i32 @mustprogress_load(i32* %ptr) mustprogress { -; CHECK-NOT: Function Attrs: {{.*}} willreturn -; CHECK: define i32 @mustprogress_load( +; CHECK: Function Attrs: {{.*}} readonly willreturn +; CHECK-NEXT: define i32 @mustprogress_load( ; entry: %r = load i32, i32* %ptr @@ -35,16 +33,15 @@ declare void @unknown_fn() define void @mustprogress_call_unknown_fn() mustprogress { ; CHECK-NOT: Function Attrs: {{.*}} willreturn -; CHECK: define void @mustprogress_call_unknown_fn( +; CHECK: define void @mustprogress_call_unknown_fn( ; call void @unknown_fn() ret void } -; TODO define i32 @mustprogress_call_known_functions(i32* %ptr) mustprogress { -; CHECK-NOT: Function Attrs: {{.*}} willreturn -; CHECK: define i32 @mustprogress_call_known_functions( +; CHECK: Function Attrs: {{.*}} readonly willreturn +; CHECK-NEXT: define i32 @mustprogress_call_known_functions( ; call void @mustprogress_readnone() %r = call i32 @mustprogress_load(i32* %ptr) @@ -53,10 +50,9 @@ define i32 @mustprogress_call_known_functions(i32* %ptr) mustprogress { declare i32 @__gxx_personality_v0(...) -; TODO define i64 @mustprogress_mayunwind() mustprogress personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-NOT: Function Attrs: {{.*}} willreturn -; CHECK: define i64 @mustprogress_mayunwind( +; CHECK: Function Attrs: {{.*}} readnone willreturn +; CHECK-NEXT: define i64 @mustprogress_mayunwind( ; %a = invoke i64 @fn_noread() to label %A unwind label %B From e53bbd99516fc7b612df1ae08d48288d0b8784ea Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 12 Jan 2021 11:22:31 -0800 Subject: [PATCH 61/86] [IR] move nomerge attribute from function declaration/definition to callsites Move nomerge attribute from function declaration/definition to callsites to allow virtual function calls attach the attribute. Differential Revision: https://reviews.llvm.org/D94537 --- clang/lib/CodeGen/CGCall.cpp | 16 ++++----- clang/lib/CodeGen/CodeGenModule.cpp | 3 -- clang/test/CodeGen/attr-nomerge.cpp | 54 ++++++++++++++--------------- 3 files changed, 35 insertions(+), 38 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 2cc7203d119465..42801372189b17 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1985,7 +1985,9 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoReturn); NBA = Fn->getAttr(); } - if (!AttrOnCallSite && TargetDecl->hasAttr()) + // Only place nomerge attribute on call sites, never functions. This + // allows it to work on indirect virtual function calls. + if (AttrOnCallSite && TargetDecl->hasAttr()) FuncAttrs.addAttribute(llvm::Attribute::NoMerge); } @@ -5018,13 +5020,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::StrictFP); - // Add nomerge attribute to the call-site if the callee function doesn't have - // the attribute. - if (const FunctionDecl *FD = dyn_cast_or_null(TargetDecl)) - if (!FD->hasAttr() && InNoMergeAttributedStmt) - Attrs = Attrs.addAttribute(getLLVMContext(), - llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoMerge); + // Add call-site nomerge attribute if exists. + if (InNoMergeAttributedStmt) + Attrs = + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoMerge); // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index da5b03b138bf9d..bee51715bdc615 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1772,9 +1772,6 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::MinSize); } - if (D->hasAttr()) - B.addAttribute(llvm::Attribute::NoMerge); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); diff --git a/clang/test/CodeGen/attr-nomerge.cpp b/clang/test/CodeGen/attr-nomerge.cpp index d93f4a7c96d6e8..fc26af379fdb79 100644 --- a/clang/test/CodeGen/attr-nomerge.cpp +++ b/clang/test/CodeGen/attr-nomerge.cpp @@ -3,7 +3,7 @@ class A { public: [[clang::nomerge]] A(); - [[clang::nomerge]] ~A(); + [[clang::nomerge]] virtual ~A(); [[clang::nomerge]] void f(); [[clang::nomerge]] virtual void g(); [[clang::nomerge]] static void f1(); @@ -14,14 +14,14 @@ class B : public A { void g() override; }; -[[clang::nomerge]] bool bar(); +bool bar(); [[clang::nomerge]] void f(bool, bool); void foo(int i, A *ap, B *bp) { [[clang::nomerge]] bar(); [[clang::nomerge]] (i = 4, bar()); [[clang::nomerge]] (void)(bar()); - [[clang::nomerge]] f(bar(), bar()); + f(bar(), bar()); [[clang::nomerge]] [] { bar(); bar(); }(); // nomerge only applies to the anonymous function call [[clang::nomerge]] for (bar(); bar(); bar()) {} [[clang::nomerge]] { asm("nop"); } @@ -37,6 +37,9 @@ void foo(int i, A *ap, B *bp) { B b; b.g(); + + A *newA = new B(); + delete newA; } int g(int i); @@ -57,37 +60,34 @@ void something_else_again() { g(1); } +// CHECK: call zeroext i1 @_Z3barv() #[[ATTR0:[0-9]+]] +// CHECK: call zeroext i1 @_Z3barv() #[[ATTR0]] +// CHECK: call zeroext i1 @_Z3barv() #[[ATTR0]] // CHECK: call zeroext i1 @_Z3barv(){{$}} // CHECK: call zeroext i1 @_Z3barv(){{$}} -// CHECK: call zeroext i1 @_Z3barv(){{$}} -// CHECK: call zeroext i1 @_Z3barv(){{$}} -// CHECK: call zeroext i1 @_Z3barv(){{$}} -// CHECK: call void @_Z1fbb({{.*}}){{$}} -// CHECK: call void @"_ZZ3fooiP1AP1BENK3$_0clEv"{{.*}} #[[ATTR0:[0-9]+]] -// CHECK: call zeroext i1 @_Z3barv(){{$}} -// CHECK: call zeroext i1 @_Z3barv(){{$}} -// CHECK: call zeroext i1 @_Z3barv(){{$}} +// CHECK: call void @_Z1fbb({{.*}}) #[[ATTR0]] +// CHECK: call void @"_ZZ3fooiP1AP1BENK3$_0clEv"{{.*}} #[[ATTR0]] +// CHECK: call zeroext i1 @_Z3barv() #[[ATTR0]] +// CHECK-LABEL: for.cond: +// CHECK: call zeroext i1 @_Z3barv() #[[ATTR0]] +// CHECK-LABEL: for.inc: +// CHECK: call zeroext i1 @_Z3barv() #[[ATTR0]] // CHECK: call void asm sideeffect "nop"{{.*}} #[[ATTR1:[0-9]+]] // CHECK: call zeroext i1 @_Z3barv(){{$}} // CHECK: %[[AG:.*]] = load void (%class.A*)*, void (%class.A*)** -// CHECK-NEXT: call void %[[AG]](%class.A* nonnull dereferenceable +// CHECK-NEXT: call void %[[AG]](%class.A* {{.*}}) #[[ATTR0]] // CHECK: %[[BG:.*]] = load void (%class.B*)*, void (%class.B*)** // CHECK-NEXT: call void %[[BG]](%class.B* nonnull dereferenceable - - -// CHECK-DAG: declare zeroext i1 @_Z3barv() #[[ATTR2:[0-9]+]] -// CHECK-DAG: declare void @_Z1fbb(i1 zeroext, i1 zeroext) #[[ATTR2]] -// CHECK-DAG: declare void @_ZN1AC1Ev{{.*}} #[[ATTR2]] -// CHECK-DAG: declare void @_ZN1A1fEv{{.*}} #[[ATTR2]] -// CHECK-DAG: declare void @_ZN1A1gEv{{.*}} #[[ATTR2]] -// CHECK-DAG: declare void @_ZN1A2f1Ev{{.*}} #[[ATTR2]] -// CHECK-DAG: declare void @_ZN1AC2Ev{{.*}} #[[ATTR2]] -// CHECK-DAG: declare void @_ZN1AD1Ev{{.*}} #[[ATTR3:[0-9]+]] -// CHECK-DAG: declare void @_ZN1AD2Ev{{.*}} #[[ATTR3]] -// CHECK-DAG: define{{.*}} i32 @_Z1gi(i32 %i) #[[ATTR4:[0-9]+]] { +// CHECK: call void @_ZN1AC1Ev({{.*}}) #[[ATTR0]] +// CHECK: call void @_ZN1A1fEv({{.*}}) #[[ATTR0]] +// CHECK: call void @_ZN1A1gEv({{.*}}) #[[ATTR0]] +// CHECK: call void @_ZN1A2f1Ev() #[[ATTR0]] +// CHECK: call void @_ZN1BC1Ev({{.*}}){{$}} +// CHECK: call void @_ZN1B1gEv({{.*}}){{$}} +// CHECK: call void @_ZN1BC1Ev({{.*}}){{$}} +// CHECK: %[[AG:.*]] = load void (%class.A*)*, void (%class.A*)** +// CHECK-NEXT: call void %[[AG]](%class.A* {{.*}}) #[[ATTR1]] +// CHECK: call void @_ZN1AD1Ev(%class.A* {{.*}}) #[[ATTR1]] // CHECK-DAG: attributes #[[ATTR0]] = {{{.*}}nomerge{{.*}}} // CHECK-DAG: attributes #[[ATTR1]] = {{{.*}}nomerge{{.*}}} -// CHECK-DAG: attributes #[[ATTR2]] = {{{.*}}nomerge{{.*}}} -// CHECK-DAG: attributes #[[ATTR3]] = {{{.*}}nomerge{{.*}}} -// CHECK-DAG: attributes #[[ATTR4]] = {{{.*}}nomerge{{.*}}} From 922a5b894114defb5302e514973de8c9cd23af6a Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Mon, 11 Jan 2021 22:28:17 +0000 Subject: [PATCH 62/86] [clang-tidy] Add test for Transformer-based checks with diagnostics. Adds a test that checks the diagnostic output of the tidy. Differential Revision: https://reviews.llvm.org/D94453 --- .../TransformerClangTidyCheckTest.cpp | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp index e8df4bb6007187..24b6bea98787a0 100644 --- a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp @@ -10,8 +10,10 @@ #include "ClangTidyTest.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Tooling/Transformer/RangeSelector.h" +#include "clang/Tooling/Transformer/RewriteRule.h" #include "clang/Tooling/Transformer/Stencil.h" #include "clang/Tooling/Transformer/Transformer.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" namespace clang { @@ -25,20 +27,21 @@ using transformer::change; using transformer::IncludeFormat; using transformer::makeRule; using transformer::node; +using transformer::noopEdit; using transformer::RewriteRule; +using transformer::RootID; using transformer::statement; // Invert the code of an if-statement, while maintaining its semantics. RewriteRule invertIf() { StringRef C = "C", T = "T", E = "E"; - RewriteRule Rule = - makeRule(ifStmt(hasCondition(expr().bind(C)), hasThen(stmt().bind(T)), - hasElse(stmt().bind(E))), - change(statement(std::string(RewriteRule::RootID)), - cat("if(!(", node(std::string(C)), ")) ", - statement(std::string(E)), " else ", - statement(std::string(T)))), - cat("negate condition and reverse `then` and `else` branches")); + RewriteRule Rule = makeRule( + ifStmt(hasCondition(expr().bind(C)), hasThen(stmt().bind(T)), + hasElse(stmt().bind(E))), + change(statement(RootID), cat("if(!(", node(std::string(C)), ")) ", + statement(std::string(E)), " else ", + statement(std::string(T)))), + cat("negate condition and reverse `then` and `else` branches")); return Rule; } @@ -68,6 +71,25 @@ TEST(TransformerClangTidyCheckTest, Basic) { EXPECT_EQ(Expected, test::runCheckOnCode(Input)); } +TEST(TransformerClangTidyCheckTest, DiagnosticsCorrectlyGenerated) { + class DiagOnlyCheck : public TransformerClangTidyCheck { + public: + DiagOnlyCheck(StringRef Name, ClangTidyContext *Context) + : TransformerClangTidyCheck( + makeRule(returnStmt(), noopEdit(node(RootID)), cat("message")), + Name, Context) {} + }; + std::string Input = "int h() { return 5; }"; + std::vector Errors; + EXPECT_EQ(Input, test::runCheckOnCode(Input, &Errors)); + EXPECT_EQ(Errors.size(), 1U); + EXPECT_EQ(Errors[0].Message.Message, "message"); + EXPECT_THAT(Errors[0].Ranges, testing::IsEmpty()); + + // The diagnostic is anchored to the match, "return 5". + EXPECT_EQ(Errors[0].Message.FileOffset, 10U); +} + class IntLitCheck : public TransformerClangTidyCheck { public: IntLitCheck(StringRef Name, ClangTidyContext *Context) From d49974f9c98ebce5a679eced9f27add138b881fa Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 12 Jan 2021 21:21:22 +0100 Subject: [PATCH 63/86] [InstCombine] Regenerate test checks (NFC) --- .../InstCombine/2006-12-15-Range-Test.ll | 45 ++++--- .../InstCombine/2007-03-13-CompareMerge.ll | 16 ++- .../InstCombine/2007-05-10-icmp-or.ll | 16 ++- .../InstCombine/2007-11-15-CompareMiscomp.ll | 16 ++- .../InstCombine/2008-01-13-AndCmpCmp.ll | 18 ++- .../InstCombine/2008-02-28-OrFCmpCrash.ll | 27 +++- .../InstCombine/2008-06-21-CompareMiscomp.ll | 16 ++- .../Transforms/InstCombine/2008-08-05-And.ll | 41 ++++-- .../Transforms/InstCombine/2012-02-28-ICmp.ll | 17 ++- .../InstCombine/2012-03-10-InstCombine.ll | 27 +++- .../Transforms/InstCombine/range-check.ll | 124 +++++++++++------- 11 files changed, 242 insertions(+), 121 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll index 784b3e4fe6875c..38f6523bec397f 100644 --- a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll +++ b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll @@ -1,31 +1,44 @@ -; RUN: opt < %s -instcombine -S | \ -; RUN: grep icmp | count 1 -; RUN: opt < %s -instcombine -S | \ -; RUN: grep "icmp ugt" | count 1 -; END. +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-p:32:32" target triple = "i686-pc-linux-gnu" @r = external global [17 x i32] ; <[17 x i32]*> [#uses=1] define i1 @print_pgm_cond_true(i32 %tmp12.reload, i32* %tmp16.out) { +; CHECK-LABEL: @print_pgm_cond_true( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[COND_TRUE:%.*]] +; CHECK: bb27.exitStub: +; CHECK-NEXT: store i32 [[TMP16:%.*]], i32* [[TMP16_OUT:%.*]], align 4 +; CHECK-NEXT: ret i1 true +; CHECK: cond_next23.exitStub: +; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP16_OUT]], align 4 +; CHECK-NEXT: ret i1 false +; CHECK: cond_true: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 [[TMP12_RELOAD:%.*]] +; CHECK-NEXT: [[TMP16]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16_OFF:%.*]] = add i32 [[TMP16]], 31 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[TMP16_OFF]], 62 +; CHECK-NEXT: br i1 [[TMP0]], label [[BB27_EXITSTUB:%.*]], label [[COND_NEXT23_EXITSTUB:%.*]] +; newFuncRoot: - br label %cond_true + br label %cond_true bb27.exitStub: ; preds = %cond_true - store i32 %tmp16, i32* %tmp16.out - ret i1 true + store i32 %tmp16, i32* %tmp16.out + ret i1 true cond_next23.exitStub: ; preds = %cond_true - store i32 %tmp16, i32* %tmp16.out - ret i1 false + store i32 %tmp16, i32* %tmp16.out + ret i1 false cond_true: ; preds = %newFuncRoot - %tmp15 = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 %tmp12.reload ; [#uses=1] - %tmp16 = load i32, i32* %tmp15 ; [#uses=4] - %tmp18 = icmp slt i32 %tmp16, -31 ; [#uses=1] - %tmp21 = icmp sgt i32 %tmp16, 31 ; [#uses=1] - %bothcond = or i1 %tmp18, %tmp21 ; [#uses=1] - br i1 %bothcond, label %bb27.exitStub, label %cond_next23.exitStub + %tmp15 = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 %tmp12.reload ; [#uses=1] + %tmp16 = load i32, i32* %tmp15 ; [#uses=4] + %tmp18 = icmp slt i32 %tmp16, -31 ; [#uses=1] + %tmp21 = icmp sgt i32 %tmp16, 31 ; [#uses=1] + %bothcond = or i1 %tmp18, %tmp21 ; [#uses=1] + br i1 %bothcond, label %bb27.exitStub, label %cond_next23.exitStub } diff --git a/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll b/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll index 826d68aefc1a68..6db886b25ede40 100644 --- a/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll +++ b/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll @@ -1,9 +1,15 @@ -; RUN: opt < %s -instcombine -S | grep "icmp sle" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + ; PR1244 define i1 @test(i32 %c.3.i, i32 %d.292.2.i) { - %tmp266.i = icmp slt i32 %c.3.i, %d.292.2.i - %tmp276.i = icmp eq i32 %c.3.i, %d.292.2.i - %sel_tmp80 = or i1 %tmp266.i, %tmp276.i - ret i1 %sel_tmp80 +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sle i32 [[C_3_I:%.*]], [[D_292_2_I:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %tmp266.i = icmp slt i32 %c.3.i, %d.292.2.i + %tmp276.i = icmp eq i32 %c.3.i, %d.292.2.i + %sel_tmp80 = or i1 %tmp266.i, %tmp276.i + ret i1 %sel_tmp80 } diff --git a/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll b/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll index 4af5dfeef5ddb9..fb9b9a6cb5af73 100644 --- a/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll +++ b/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll @@ -1,8 +1,14 @@ -; RUN: opt < %s -instcombine -disable-output +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + define i1 @test(i32 %tmp9) { - %tmp20 = icmp ugt i32 %tmp9, 255 ; [#uses=1] - %tmp11.not = icmp sgt i32 %tmp9, 255 ; [#uses=1] - %bothcond = or i1 %tmp20, %tmp11.not ; [#uses=1] - ret i1 %bothcond +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP20:%.*]] = icmp ugt i32 [[TMP9:%.*]], 255 +; CHECK-NEXT: ret i1 [[TMP20]] +; + %tmp20 = icmp ugt i32 %tmp9, 255 ; [#uses=1] + %tmp11.not = icmp sgt i32 %tmp9, 255 ; [#uses=1] + %bothcond = or i1 %tmp20, %tmp11.not ; [#uses=1] + ret i1 %bothcond } diff --git a/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll b/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll index 6b83dd982dc891..f872b6a98e096b 100644 --- a/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll +++ b/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll @@ -1,10 +1,16 @@ -; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 1" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + ; PR1800 define i1 @test(i32 %In) { - %c1 = icmp sgt i32 %In, -1 - %c2 = icmp eq i32 %In, 1 - %V = and i1 %c1, %c2 - ret i1 %V +; CHECK-LABEL: @test( +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[IN:%.*]], 1 +; CHECK-NEXT: ret i1 [[C2]] +; + %c1 = icmp sgt i32 %In, -1 + %c2 = icmp eq i32 %In, 1 + %V = and i1 %c1, %c2 + ret i1 %V } diff --git a/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll b/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll index fbc8ba972a032c..eb3329b4dd02bf 100644 --- a/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll +++ b/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll @@ -1,9 +1,17 @@ -; RUN: opt < %s -instcombine -S | grep and +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + ; PR1907 define i1 @test(i32 %c84.17) { - %tmp2696 = icmp ne i32 %c84.17, 34 ; [#uses=2] - %tmp2699 = icmp sgt i32 %c84.17, -1 ; [#uses=1] - %tmp2703 = and i1 %tmp2696, %tmp2699 ; [#uses=1] - ret i1 %tmp2703 +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP2696:%.*]] = icmp ne i32 [[C84_17:%.*]], 34 +; CHECK-NEXT: [[TMP2699:%.*]] = icmp sgt i32 [[C84_17]], -1 +; CHECK-NEXT: [[TMP2703:%.*]] = and i1 [[TMP2696]], [[TMP2699]] +; CHECK-NEXT: ret i1 [[TMP2703]] +; + %tmp2696 = icmp ne i32 %c84.17, 34 ; [#uses=2] + %tmp2699 = icmp sgt i32 %c84.17, -1 ; [#uses=1] + %tmp2703 = and i1 %tmp2696, %tmp2699 ; [#uses=1] + ret i1 %tmp2703 } diff --git a/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll b/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll index 7f8bd4fb8a90ff..7b08a7b3a02577 100644 --- a/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll +++ b/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll @@ -1,16 +1,29 @@ -; RUN: opt < %s -instcombine | llvm-dis +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + ; rdar://5771353 define float @test(float %x, x86_fp80 %y) nounwind readonly { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP67:%.*]] = fcmp uno x86_fp80 [[Y:%.*]], 0xK00000000000000000000 +; CHECK-NEXT: [[TMP71:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: [[BOTHCOND:%.*]] = or i1 [[TMP67]], [[TMP71]] +; CHECK-NEXT: br i1 [[BOTHCOND]], label [[BB74:%.*]], label [[BB80:%.*]] +; CHECK: bb74: +; CHECK-NEXT: ret float 0.000000e+00 +; CHECK: bb80: +; CHECK-NEXT: ret float 0.000000e+00 +; entry: - %tmp67 = fcmp uno x86_fp80 %y, 0xK00000000000000000000 ; [#uses=1] - %tmp71 = fcmp uno float %x, 0.000000e+00 ; [#uses=1] - %bothcond = or i1 %tmp67, %tmp71 ; [#uses=1] - br i1 %bothcond, label %bb74, label %bb80 + %tmp67 = fcmp uno x86_fp80 %y, 0xK00000000000000000000 ; [#uses=1] + %tmp71 = fcmp uno float %x, 0.000000e+00 ; [#uses=1] + %bothcond = or i1 %tmp67, %tmp71 ; [#uses=1] + br i1 %bothcond, label %bb74, label %bb80 bb74: ; preds = %entry - ret float 0.000000e+00 + ret float 0.000000e+00 bb80: ; preds = %entry - ret float 0.000000e+00 + ret float 0.000000e+00 } diff --git a/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll b/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll index 80bd83bc6ba0d9..11226bcf41855b 100644 --- a/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll +++ b/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll @@ -1,11 +1,17 @@ -; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 15" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + ; PR2479 ; (See also PR1800.) define i1 @test(i32 %In) { - %c1 = icmp ugt i32 %In, 13 - %c2 = icmp eq i32 %In, 15 - %V = and i1 %c1, %c2 - ret i1 %V +; CHECK-LABEL: @test( +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[IN:%.*]], 15 +; CHECK-NEXT: ret i1 [[C2]] +; + %c1 = icmp ugt i32 %In, 13 + %c2 = icmp eq i32 %In, 15 + %V = and i1 %c1, %c2 + ret i1 %V } diff --git a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll index 91f1c0b0a986b2..9efc35fb2d20ba 100644 --- a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll +++ b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll @@ -1,23 +1,40 @@ -; RUN: opt < %s -instcombine -S | not grep or +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s ; PR2629 define void @f(i8* %x) nounwind { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[L1:%.*]] = load i8, i8* [[X:%.*]], align 1 +; CHECK-NEXT: [[S1:%.*]] = add i8 [[L1]], -6 +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i8 [[S1]], 2 +; CHECK-NEXT: [[S2:%.*]] = add i8 [[L1]], -10 +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i8 [[S2]], 2 +; CHECK-NEXT: [[A1:%.*]] = and i1 [[C1]], [[C2]] +; CHECK-NEXT: br i1 [[A1]], label [[INCOMPATIBLE:%.*]], label [[OKAY:%.*]] +; CHECK: okay: +; CHECK-NEXT: ret void +; CHECK: incompatible: +; CHECK-NEXT: ret void +; entry: - br label %bb + br label %bb bb: - %g1 = getelementptr i8, i8* %x, i32 0 - %l1 = load i8, i8* %g1, align 1 - %s1 = sub i8 %l1, 6 - %c1 = icmp ugt i8 %s1, 2 - %s2 = sub i8 %l1, 10 - %c2 = icmp ugt i8 %s2, 2 - %a1 = and i1 %c1, %c2 - br i1 %a1, label %incompatible, label %okay + %g1 = getelementptr i8, i8* %x, i32 0 + %l1 = load i8, i8* %g1, align 1 + %s1 = sub i8 %l1, 6 + %c1 = icmp ugt i8 %s1, 2 + %s2 = sub i8 %l1, 10 + %c2 = icmp ugt i8 %s2, 2 + %a1 = and i1 %c1, %c2 + br i1 %a1, label %incompatible, label %okay okay: - ret void + ret void incompatible: - ret void + ret void } diff --git a/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll b/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll index 82cf85fa4cd842..97956bc2e24975 100644 --- a/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll +++ b/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll @@ -1,15 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s + ; ; There should be no transformation. -; CHECK: %a = trunc i32 %x to i8 -; CHECK: %b = icmp ne i8 %a, 0 -; CHECK: %c = and i32 %x, 16711680 -; CHECK: %d = icmp ne i32 %c, 0 -; CHECK: %e = and i1 %b, %d -; CHECK: ret i1 %e - define i1 @f1(i32 %x) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[A:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[B:%.*]] = icmp ne i8 [[A]], 0 +; CHECK-NEXT: [[C:%.*]] = and i32 [[X]], 16711680 +; CHECK-NEXT: [[D:%.*]] = icmp ne i32 [[C]], 0 +; CHECK-NEXT: [[E:%.*]] = and i1 [[B]], [[D]] +; CHECK-NEXT: ret i1 [[E]] +; %a = trunc i32 %x to i8 %b = icmp ne i8 %a, 0 %c = and i32 %x, 16711680 diff --git a/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll b/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll index d1860bccd75ee8..8ef65a27c1f55e 100644 --- a/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll +++ b/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll @@ -1,13 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -instcombine | FileCheck %s ; Derived from gcc.c-torture/execute/frame-address.c -; CHECK-LABEL: @func( -; CHECK: return: -; CHECK-NOT: ret i32 0 -; CHECK: ret i32 %retval - define i32 @func(i8* %c, i8* %f) nounwind uwtable readnone noinline ssp { +; CHECK-LABEL: @func( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = alloca i8, align 1 +; CHECK-NEXT: store i8 0, i8* [[D]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[D]], [[C:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ule i8* [[D]], [[F:%.*]] +; CHECK-NEXT: [[NOT_CMP1:%.*]] = icmp uge i8* [[C]], [[F]] +; CHECK-NEXT: [[DOTCMP2:%.*]] = and i1 [[CMP2]], [[NOT_CMP1]] +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[CMP5:%.*]] = icmp uge i8* [[D]], [[F]] +; CHECK-NEXT: [[NOT_CMP3:%.*]] = icmp ule i8* [[C]], [[F]] +; CHECK-NEXT: [[DOTCMP5:%.*]] = and i1 [[CMP5]], [[NOT_CMP3]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0_IN:%.*]] = phi i1 [ [[DOTCMP2]], [[IF_THEN]] ], [ [[DOTCMP5]], [[IF_ELSE]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = zext i1 [[RETVAL_0_IN]] to i32 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; entry: %d = alloca i8, align 1 store i8 0, i8* %d, align 1 diff --git a/llvm/test/Transforms/InstCombine/range-check.ll b/llvm/test/Transforms/InstCombine/range-check.ll index 35f11dd39ef39e..ba77beae0f6864 100644 --- a/llvm/test/Transforms/InstCombine/range-check.ll +++ b/llvm/test/Transforms/InstCombine/range-check.ll @@ -1,12 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; Check simplification of ; (icmp sgt x, -1) & (icmp sgt/sge n, x) --> icmp ugt/uge n, x -; CHECK-LABEL: define i1 @test_and1 -; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_and1(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and1( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp sge i32 %x, 0 %b = icmp slt i32 %x, %nn @@ -14,10 +17,12 @@ define i1 @test_and1(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @test_and2 -; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_and2(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and2( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp sgt i32 %x, -1 %b = icmp sle i32 %x, %nn @@ -25,10 +30,12 @@ define i1 @test_and2(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @test_and3 -; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_and3(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and3( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp sgt i32 %nn, %x %b = icmp sge i32 %x, 0 @@ -36,10 +43,12 @@ define i1 @test_and3(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @test_and4 -; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_and4(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and4( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp sge i32 %nn, %x %b = icmp sge i32 %x, 0 @@ -47,10 +56,12 @@ define i1 @test_and4(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @test_or1 -; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_or1(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or1( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp slt i32 %x, 0 %b = icmp sge i32 %x, %nn @@ -58,10 +69,12 @@ define i1 @test_or1(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @test_or2 -; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_or2(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or2( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp sle i32 %x, -1 %b = icmp sgt i32 %x, %nn @@ -69,10 +82,12 @@ define i1 @test_or2(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @test_or3 -; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_or3(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or3( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp sle i32 %nn, %x %b = icmp slt i32 %x, 0 @@ -80,10 +95,12 @@ define i1 @test_or3(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @test_or4 -; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x -; CHECK: ret i1 [[R]] define i1 @test_or4(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or4( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; %nn = and i32 %n, 2147483647 %a = icmp slt i32 %nn, %x %b = icmp slt i32 %x, 0 @@ -93,12 +110,14 @@ define i1 @test_or4(i32 %x, i32 %n) { ; Negative tests -; CHECK-LABEL: define i1 @negative1 -; CHECK: %a = icmp -; CHECK: %b = icmp -; CHECK: %c = and i1 %a, %b -; CHECK: ret i1 %c define i1 @negative1(i32 %x, i32 %n) { +; CHECK-LABEL: @negative1( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp sgt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; %nn = and i32 %n, 2147483647 %a = icmp slt i32 %x, %nn %b = icmp sgt i32 %x, 0 ; should be: icmp sge @@ -106,24 +125,27 @@ define i1 @negative1(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @negative2 -; CHECK: %a = icmp -; CHECK: %b = icmp -; CHECK: %c = and i1 %a, %b -; CHECK: ret i1 %c define i1 @negative2(i32 %x, i32 %n) { +; CHECK-LABEL: @negative2( +; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[X:%.*]], [[N:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; %a = icmp slt i32 %x, %n ; n can be negative %b = icmp sge i32 %x, 0 %c = and i1 %a, %b ret i1 %c } -; CHECK-LABEL: define i1 @negative3 -; CHECK: %a = icmp -; CHECK: %b = icmp -; CHECK: %c = and i1 %a, %b -; CHECK: ret i1 %c define i1 @negative3(i32 %x, i32 %y, i32 %n) { +; CHECK-LABEL: @negative3( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp sgt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[Y:%.*]], -1 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; %nn = and i32 %n, 2147483647 %a = icmp slt i32 %x, %nn %b = icmp sge i32 %y, 0 ; should compare %x and not %y @@ -131,12 +153,14 @@ define i1 @negative3(i32 %x, i32 %y, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @negative4 -; CHECK: %a = icmp -; CHECK: %b = icmp -; CHECK: %c = and i1 %a, %b -; CHECK: ret i1 %c define i1 @negative4(i32 %x, i32 %n) { +; CHECK-LABEL: @negative4( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; %nn = and i32 %n, 2147483647 %a = icmp ne i32 %x, %nn ; should be: icmp slt/sle %b = icmp sge i32 %x, 0 @@ -144,12 +168,14 @@ define i1 @negative4(i32 %x, i32 %n) { ret i1 %c } -; CHECK-LABEL: define i1 @negative5 -; CHECK: %a = icmp -; CHECK: %b = icmp -; CHECK: %c = or i1 %a, %b -; CHECK: ret i1 %c define i1 @negative5(i32 %x, i32 %n) { +; CHECK-LABEL: @negative5( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp sgt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; %nn = and i32 %n, 2147483647 %a = icmp slt i32 %x, %nn %b = icmp sge i32 %x, 0 From 9f61fbd75ae1757d77988b37562de4d6583579aa Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 12 Jan 2021 12:32:24 -0800 Subject: [PATCH 64/86] [LV] Relax assumption that LCSSA implies single entry This relates to the ongoing effort to support vectorization of multiple exit loops (see D93317). The previous code assumed that LCSSA phis were always single entry before the vectorizer ran. This was correct, but only because the vectorizer allowed only a single exiting edge. There's nothing in the definition of LCSSA which requires single entry phis. A common case where this comes up is with a loop with multiple exiting blocks which all reach a common exit block. (e.g. see the test updates) Differential Revision: https://reviews.llvm.org/D93725 --- .../Vectorize/LoopVectorizationLegality.cpp | 20 +-- .../Transforms/Vectorize/LoopVectorize.cpp | 79 +++++----- .../first-order-recurrence-complex.ll | 114 ++++++++++++-- .../Transforms/LoopVectorize/loop-form.ll | 149 ++++++++++++++---- 4 files changed, 261 insertions(+), 101 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 3906b11ba4b994..e3e522958c3a40 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1101,8 +1101,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, // TODO: This restriction can be relaxed in the near future, it's here solely // to allow separation of changes for review. We need to generalize the phi // update logic in a number of places. - BasicBlock *ExitBB = Lp->getUniqueExitBlock(); - if (!ExitBB) { + if (!Lp->getUniqueExitBlock()) { reportVectorizationFailure("The loop must have a unique exit block", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); @@ -1110,24 +1109,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, Result = false; else return false; - } else { - // The existing code assumes that LCSSA implies that phis are single entry - // (which was true when we had at most a single exiting edge from the latch). - // In general, there's nothing which prevents an LCSSA phi in exit block from - // having two or more values if there are multiple exiting edges leading to - // the exit block. (TODO: implement general case) - if (!llvm::empty(ExitBB->phis()) && !ExitBB->getSinglePredecessor()) { - reportVectorizationFailure("The loop must have no live-out values if " - "it has more than one exiting block", - "loop control flow is not understood by vectorizer", - "CFGNotUnderstood", ORE, TheLoop); - if (DoExtraAnalysis) - Result = false; - else - return false; - } } - return Result; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e6cadf8f8796e8..5ae400fb5dc9c0 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -633,10 +633,11 @@ class InnerLoopVectorizer { /// Clear NSW/NUW flags from reduction instructions if necessary. void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc); - /// The Loop exit block may have single value PHI nodes with some - /// incoming value. While vectorizing we only handled real values - /// that were defined inside the loop and we should have one value for - /// each predecessor of its parent basic block. See PR14725. + /// Fixup the LCSSA phi nodes in the unique exit block. This simply + /// means we need to add the appropriate incoming value from the middle + /// block as exiting edges from the scalar epilogue loop (if present) are + /// already in place, and we exit the vector loop exclusively to the middle + /// block. void fixLCSSAPHIs(); /// Iteratively sink the scalarized operands of a predicated instruction into @@ -4149,11 +4150,14 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // vector recurrence we extracted in the middle block. Since the loop is in // LCSSA form, we just need to find all the phi nodes for the original scalar // recurrence in the exit block, and then add an edge for the middle block. - for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { - if (LCSSAPhi.getIncomingValue(0) == Phi) { + // Note that LCSSA does not imply single entry when the original scalar loop + // had multiple exiting edges (as we always run the last iteration in the + // scalar epilogue); in that case, the exiting path through middle will be + // dynamically dead and the value picked for the phi doesn't matter. + for (PHINode &LCSSAPhi : LoopExitBlock->phis()) + if (any_of(LCSSAPhi.incoming_values(), + [Phi](Value *V) { return V == Phi; })) LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); - } - } } void InnerLoopVectorizer::fixReduction(PHINode *Phi) { @@ -4311,21 +4315,17 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // Now, we need to fix the users of the reduction variable // inside and outside of the scalar remainder loop. - // We know that the loop is in LCSSA form. We need to update the - // PHI nodes in the exit blocks. - for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { - // All PHINodes need to have a single entry edge, or two if - // we already fixed them. - assert(LCSSAPhi.getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); - // We found a reduction value exit-PHI. Update it with the - // incoming bypass edge. - if (LCSSAPhi.getIncomingValue(0) == LoopExitInst) + // We know that the loop is in LCSSA form. We need to update the PHI nodes + // in the exit blocks. See comment on analogous loop in + // fixFirstOrderRecurrence for a more complete explaination of the logic. + for (PHINode &LCSSAPhi : LoopExitBlock->phis()) + if (any_of(LCSSAPhi.incoming_values(), + [LoopExitInst](Value *V) { return V == LoopExitInst; })) LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); - } // end of the LCSSA phi scan. - // Fix the scalar loop reduction variable with the incoming reduction sum - // from the vector body and from the backedge value. + // Fix the scalar loop reduction variable with the incoming reduction sum + // from the vector body and from the backedge value. int IncomingEdgeBlockIdx = Phi->getBasicBlockIndex(OrigLoop->getLoopLatch()); assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); @@ -4367,24 +4367,27 @@ void InnerLoopVectorizer::clearReductionWrapFlags( void InnerLoopVectorizer::fixLCSSAPHIs() { for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { - if (LCSSAPhi.getNumIncomingValues() == 1) { - auto *IncomingValue = LCSSAPhi.getIncomingValue(0); - // Non-instruction incoming values will have only one value. - unsigned LastLane = 0; - if (isa(IncomingValue)) - LastLane = Cost->isUniformAfterVectorization( - cast(IncomingValue), VF) - ? 0 - : VF.getKnownMinValue() - 1; - assert((!VF.isScalable() || LastLane == 0) && - "scalable vectors dont support non-uniform scalars yet"); - // Can be a loop invariant incoming value or the last scalar value to be - // extracted from the vectorized loop. - Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); - Value *lastIncomingValue = - getOrCreateScalarValue(IncomingValue, { UF - 1, LastLane }); - LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock); - } + if (LCSSAPhi.getBasicBlockIndex(LoopMiddleBlock) != -1) + // Some phis were already hand updated by the reduction and recurrence + // code above, leave them alone. + continue; + + auto *IncomingValue = LCSSAPhi.getIncomingValue(0); + // Non-instruction incoming values will have only one value. + unsigned LastLane = 0; + if (isa(IncomingValue)) + LastLane = Cost->isUniformAfterVectorization( + cast(IncomingValue), VF) + ? 0 + : VF.getKnownMinValue() - 1; + assert((!VF.isScalable() || LastLane == 0) && + "scalable vectors dont support non-uniform scalars yet"); + // Can be a loop invariant incoming value or the last scalar value to be + // extracted from the vectorized loop. + Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); + Value *lastIncomingValue = + getOrCreateScalarValue(IncomingValue, { UF - 1, LastLane }); + LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock); } } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index ef3d3e659e5a4b..7d4a3c5c9935a9 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -437,22 +437,63 @@ exit: define i16 @multiple_exit(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_exit( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 4, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP13]], <4 x i16>* [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[REC:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] ; CHECK-NEXT: [[REC_NEXT]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] ; CHECK: for.body: -; CHECK-NEXT: store i16 [[REC]], i16* [[B]], align 4 +; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]] ; CHECK: if.end: -; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[REC]], [[FOR_BODY]] ], [ [[REC]], [[FOR_COND]] ] +; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[REC_LCSSA]] ; entry: @@ -483,22 +524,63 @@ if.end: define i16 @multiple_exit2(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_exit2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 4, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP13]], <4 x i16>* [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[REC:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] ; CHECK-NEXT: [[REC_NEXT]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] ; CHECK: for.body: -; CHECK-NEXT: store i16 [[REC]], i16* [[B]], align 4 +; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]] ; CHECK: if.end: -; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[REC]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ] +; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[REC_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index bf94505aec2c3f..5b2dd81a395b53 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -346,20 +346,54 @@ if.end: define i32 @multiple_unique_exit2(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_unique_exit2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1 +; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] ; CHECK: for.body: ; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]] ; CHECK: if.end: -; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] +; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ], [ [[IND_ESCAPE1]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[I_LCSSA]] ; ; TAILFOLD-LABEL: @multiple_unique_exit2( @@ -404,20 +438,52 @@ if.end: define i32 @multiple_unique_exit3(i16* %p, i32 %n) { ; CHECK-LABEL: @multiple_unique_exit3( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] ; CHECK: for.body: ; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]] ; CHECK: if.end: -; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] +; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ], [ 0, [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[EXIT]] ; ; TAILFOLD-LABEL: @multiple_unique_exit3( @@ -798,7 +864,7 @@ define void @scalar_predication(float* %addr) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -819,7 +885,7 @@ define void @scalar_predication(float* %addr) { ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: br label [[LOOP_HEADER]], [[LOOP9:!llvm.loop !.*]] +; CHECK-NEXT: br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -870,27 +936,54 @@ exit: ret void } -define i32 @reduction(i32* %addr) { -; CHECK-LABEL: @reduction( +define i32 @me_reduction(i32* %addr) { +; CHECK-LABEL: @me_reduction( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP5]], [[RDX_SHUF]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[BIN_RDX]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 -; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]] ; CHECK: exit: -; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[LCSSA]] ; -; TAILFOLD-LABEL: @reduction( +; TAILFOLD-LABEL: @me_reduction( ; TAILFOLD-NEXT: entry: ; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] ; TAILFOLD: loop.header: @@ -935,8 +1028,8 @@ exit: ; this. There's an analogous single exit case where we extract the N-1 ; value of the reduction that we can also handle. If we fix the later, the ; multiple exit case probably falls out. -define i32 @reduction2(i32* %addr) { -; CHECK-LABEL: @reduction2( +define i32 @me_reduction2(i32* %addr) { +; CHECK-LABEL: @me_reduction2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: @@ -954,7 +1047,7 @@ define i32 @reduction2(i32* %addr) { ; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] ; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] ; -; TAILFOLD-LABEL: @reduction2( +; TAILFOLD-LABEL: @me_reduction2( ; TAILFOLD-NEXT: entry: ; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] ; TAILFOLD: loop.header: From f706486eaf07020b11f2088274c757e4070fe6d1 Mon Sep 17 00:00:00 2001 From: Sunil Srivastava Date: Tue, 12 Jan 2021 12:37:18 -0800 Subject: [PATCH 65/86] Fix for crash in __builtin_return_address in template context. The check for argument value needs to be guarded by !isValueDependent(). Differential Revision: https://reviews.llvm.org/D94438 --- clang/lib/Sema/SemaChecking.cpp | 3 ++- clang/test/Sema/builtin-returnaddress.c | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 15b5934224f0f1..2d3d36f4adad0d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1943,7 +1943,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, // -Wframe-address warning if non-zero passed to builtin // return/frame address. Expr::EvalResult Result; - if (TheCall->getArg(0)->EvaluateAsInt(Result, getASTContext()) && + if (!TheCall->getArg(0)->isValueDependent() && + TheCall->getArg(0)->EvaluateAsInt(Result, getASTContext()) && Result.Val.getInt() != 0) Diag(TheCall->getBeginLoc(), diag::warn_frame_address) << ((BuiltinID == Builtin::BI__builtin_return_address) diff --git a/clang/test/Sema/builtin-returnaddress.c b/clang/test/Sema/builtin-returnaddress.c index 3ebbdc6048d8b5..16d2a517ac12f2 100644 --- a/clang/test/Sema/builtin-returnaddress.c +++ b/clang/test/Sema/builtin-returnaddress.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -Wframe-address -verify %s // RUN: %clang_cc1 -fsyntax-only -Wmost -verify %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wframe-address -verify %s void* a(unsigned x) { return __builtin_return_address(0); @@ -17,3 +18,14 @@ void* d(unsigned x) { return __builtin_frame_address(1); // expected-warning{{calling '__builtin_frame_address' with a nonzero argument is unsafe}} } +#ifdef __cplusplus +template void *RA() +{ + return __builtin_return_address(N); // expected-warning{{calling '__builtin_return_address' with a nonzero argument is unsafe}} +} + +void *foo() +{ + return RA<2>(); // expected-note{{in instantiation of function template specialization 'RA<2>' requested here}} +} +#endif From fb063c933f0062db7fee622f7a43a6a5e560672d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 12 Jan 2021 20:54:23 +0100 Subject: [PATCH 66/86] [InstCombine] Duplicate tests for logical and/or (NFC) This replicates existing and/or tests to also test variants using select. This should help us get a more accurate view on which optimizations we're missing if we disable the select -> and/or fold. --- .../InstCombine/2006-12-15-Range-Test.ll | 37 + .../InstCombine/2007-03-13-CompareMerge.ll | 11 + .../InstCombine/2007-05-10-icmp-or.ll | 11 + .../InstCombine/2007-11-15-CompareMiscomp.ll | 11 + .../InstCombine/2008-01-13-AndCmpCmp.ll | 13 + .../InstCombine/2008-02-28-OrFCmpCrash.ll | 25 + .../InstCombine/2008-06-21-CompareMiscomp.ll | 11 + .../Transforms/InstCombine/2008-08-05-And.ll | 37 + .../Transforms/InstCombine/2012-02-28-ICmp.ll | 17 + .../InstCombine/2012-03-10-InstCombine.ll | 47 + llvm/test/Transforms/InstCombine/and-fcmp.ll | 1523 ++++++++++++++++- .../InstCombine/and-or-icmp-min-max.ll | 1024 ++++++++++- .../InstCombine/and-or-icmp-nullptr.ll | 334 ++++ .../Transforms/InstCombine/and-or-icmps.ll | 448 +++++ llvm/test/Transforms/InstCombine/and.ll | 157 ++ llvm/test/Transforms/InstCombine/and2.ll | 35 + llvm/test/Transforms/InstCombine/assume.ll | 104 ++ .../test/Transforms/InstCombine/bit-checks.ll | 843 +++++++-- ...th-select-of-constant-threshold-pattern.ll | 80 +- llvm/test/Transforms/InstCombine/demorgan.ll | 16 + .../InstCombine/dont-distribute-phi.ll | 54 +- .../InstCombine/fold-bin-operand.ll | 8 + llvm/test/Transforms/InstCombine/freeze.ll | 12 + .../Transforms/InstCombine/icmp-custom-dl.ll | 18 + .../Transforms/InstCombine/icmp-logical.ll | 809 ++++++++- llvm/test/Transforms/InstCombine/icmp.ll | 92 + llvm/test/Transforms/InstCombine/ispow2.ll | 288 +++- .../logical-select-inseltpoison.ll | 42 + .../Transforms/InstCombine/logical-select.ll | 42 + .../test/Transforms/InstCombine/merge-icmp.ll | 49 +- .../InstCombine/objsize-noverify.ll | 35 + .../Transforms/InstCombine/onehot_merge.ll | 472 +++++ llvm/test/Transforms/InstCombine/or-fcmp.ll | 1522 +++++++++++++++- llvm/test/Transforms/InstCombine/or.ll | 265 +++ .../InstCombine/prevent-cmp-merge.ll | 57 +- .../Transforms/InstCombine/range-check.ll | 177 ++ ...f-negative-is-non-zero-and-no-underflow.ll | 238 +++ ...ve-or-zero-is-non-zero-and-no-underflow.ll | 155 ++ ...ult-of-usub-is-non-zero-and-no-overflow.ll | 411 +++++ .../InstCombine/select-crash-noverify.ll | 16 + .../InstCombine/select-ctlz-to-cttz.ll | 57 +- .../InstCombine/select-imm-canon.ll | 35 +- llvm/test/Transforms/InstCombine/select.ll | 29 +- llvm/test/Transforms/InstCombine/set.ll | 85 +- .../InstCombine/sign-test-and-or.ll | 243 ++- .../InstCombine/signed-truncation-check.ll | 416 +++++ .../Transforms/InstCombine/umul-sign-check.ll | 169 ++ .../usub-overflow-known-by-implied-cond.ll | 138 +- .../InstCombine/widenable-conditions.ll | 143 ++ .../Transforms/InstCombine/zext-or-icmp.ll | 50 + 50 files changed, 10673 insertions(+), 238 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll index 38f6523bec397f..3d08ae5e4012a7 100644 --- a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll +++ b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll @@ -42,3 +42,40 @@ cond_true: ; preds = %newFuncRoot br i1 %bothcond, label %bb27.exitStub, label %cond_next23.exitStub } +define i1 @print_pgm_cond_true_logical(i32 %tmp12.reload, i32* %tmp16.out) { +; CHECK-LABEL: @print_pgm_cond_true_logical( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[COND_TRUE:%.*]] +; CHECK: bb27.exitStub: +; CHECK-NEXT: store i32 [[TMP16:%.*]], i32* [[TMP16_OUT:%.*]], align 4 +; CHECK-NEXT: ret i1 true +; CHECK: cond_next23.exitStub: +; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP16_OUT]], align 4 +; CHECK-NEXT: ret i1 false +; CHECK: cond_true: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 [[TMP12_RELOAD:%.*]] +; CHECK-NEXT: [[TMP16]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16_OFF:%.*]] = add i32 [[TMP16]], 31 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[TMP16_OFF]], 62 +; CHECK-NEXT: br i1 [[TMP0]], label [[BB27_EXITSTUB:%.*]], label [[COND_NEXT23_EXITSTUB:%.*]] +; +newFuncRoot: + br label %cond_true + +bb27.exitStub: ; preds = %cond_true + store i32 %tmp16, i32* %tmp16.out + ret i1 true + +cond_next23.exitStub: ; preds = %cond_true + store i32 %tmp16, i32* %tmp16.out + ret i1 false + +cond_true: ; preds = %newFuncRoot + %tmp15 = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 %tmp12.reload ; [#uses=1] + %tmp16 = load i32, i32* %tmp15 ; [#uses=4] + %tmp18 = icmp slt i32 %tmp16, -31 ; [#uses=1] + %tmp21 = icmp sgt i32 %tmp16, 31 ; [#uses=1] + %bothcond = select i1 %tmp18, i1 true, i1 %tmp21 ; [#uses=1] + br i1 %bothcond, label %bb27.exitStub, label %cond_next23.exitStub +} + diff --git a/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll b/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll index 6db886b25ede40..7cedb1c5ced23e 100644 --- a/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll +++ b/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll @@ -13,3 +13,14 @@ define i1 @test(i32 %c.3.i, i32 %d.292.2.i) { %sel_tmp80 = or i1 %tmp266.i, %tmp276.i ret i1 %sel_tmp80 } + +define i1 @test_logical(i32 %c.3.i, i32 %d.292.2.i) { +; CHECK-LABEL: @test_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sle i32 [[C_3_I:%.*]], [[D_292_2_I:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %tmp266.i = icmp slt i32 %c.3.i, %d.292.2.i + %tmp276.i = icmp eq i32 %c.3.i, %d.292.2.i + %sel_tmp80 = select i1 %tmp266.i, i1 true, i1 %tmp276.i + ret i1 %sel_tmp80 +} diff --git a/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll b/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll index fb9b9a6cb5af73..f019507082b3bd 100644 --- a/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll +++ b/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll @@ -12,3 +12,14 @@ define i1 @test(i32 %tmp9) { ret i1 %bothcond } +define i1 @test_logical(i32 %tmp9) { +; CHECK-LABEL: @test_logical( +; CHECK-NEXT: [[TMP20:%.*]] = icmp ugt i32 [[TMP9:%.*]], 255 +; CHECK-NEXT: ret i1 [[TMP20]] +; + %tmp20 = icmp ugt i32 %tmp9, 255 ; [#uses=1] + %tmp11.not = icmp sgt i32 %tmp9, 255 ; [#uses=1] + %bothcond = select i1 %tmp20, i1 true, i1 %tmp11.not ; [#uses=1] + ret i1 %bothcond +} + diff --git a/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll b/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll index f872b6a98e096b..8efa821d7de024 100644 --- a/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll +++ b/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll @@ -14,3 +14,14 @@ define i1 @test(i32 %In) { ret i1 %V } +define i1 @test_logical(i32 %In) { +; CHECK-LABEL: @test_logical( +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[IN:%.*]], 1 +; CHECK-NEXT: ret i1 [[C2]] +; + %c1 = icmp sgt i32 %In, -1 + %c2 = icmp eq i32 %In, 1 + %V = select i1 %c1, i1 %c2, i1 false + ret i1 %V +} + diff --git a/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll b/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll index eb3329b4dd02bf..6dde4402e1fb60 100644 --- a/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll +++ b/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll @@ -15,3 +15,16 @@ define i1 @test(i32 %c84.17) { %tmp2703 = and i1 %tmp2696, %tmp2699 ; [#uses=1] ret i1 %tmp2703 } + +define i1 @test_logical(i32 %c84.17) { +; CHECK-LABEL: @test_logical( +; CHECK-NEXT: [[TMP2696:%.*]] = icmp ne i32 [[C84_17:%.*]], 34 +; CHECK-NEXT: [[TMP2699:%.*]] = icmp sgt i32 [[C84_17]], -1 +; CHECK-NEXT: [[TMP2703:%.*]] = and i1 [[TMP2696]], [[TMP2699]] +; CHECK-NEXT: ret i1 [[TMP2703]] +; + %tmp2696 = icmp ne i32 %c84.17, 34 + %tmp2699 = icmp sgt i32 %c84.17, -1 + %tmp2703 = select i1 %tmp2696, i1 %tmp2699, i1 false + ret i1 %tmp2703 +} diff --git a/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll b/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll index 7b08a7b3a02577..50657d744da1d8 100644 --- a/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll +++ b/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll @@ -27,3 +27,28 @@ bb74: ; preds = %entry bb80: ; preds = %entry ret float 0.000000e+00 } + +define float @test_logical(float %x, x86_fp80 %y) nounwind readonly { +; CHECK-LABEL: @test_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP67:%.*]] = fcmp uno x86_fp80 [[Y:%.*]], 0xK00000000000000000000 +; CHECK-NEXT: [[TMP71:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: [[BOTHCOND:%.*]] = or i1 [[TMP67]], [[TMP71]] +; CHECK-NEXT: br i1 [[BOTHCOND]], label [[BB74:%.*]], label [[BB80:%.*]] +; CHECK: bb74: +; CHECK-NEXT: ret float 0.000000e+00 +; CHECK: bb80: +; CHECK-NEXT: ret float 0.000000e+00 +; +entry: + %tmp67 = fcmp uno x86_fp80 %y, 0xK00000000000000000000 ; [#uses=1] + %tmp71 = fcmp uno float %x, 0.000000e+00 ; [#uses=1] + %bothcond = select i1 %tmp67, i1 true, i1 %tmp71 ; [#uses=1] + br i1 %bothcond, label %bb74, label %bb80 + +bb74: ; preds = %entry + ret float 0.000000e+00 + +bb80: ; preds = %entry + ret float 0.000000e+00 +} diff --git a/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll b/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll index 11226bcf41855b..2b0f364ee375b4 100644 --- a/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll +++ b/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll @@ -15,3 +15,14 @@ define i1 @test(i32 %In) { ret i1 %V } +define i1 @test_logical(i32 %In) { +; CHECK-LABEL: @test_logical( +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[IN:%.*]], 15 +; CHECK-NEXT: ret i1 [[C2]] +; + %c1 = icmp ugt i32 %In, 13 + %c2 = icmp eq i32 %In, 15 + %V = select i1 %c1, i1 %c2, i1 false + ret i1 %V +} + diff --git a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll index 9efc35fb2d20ba..bec055a2ee7cf1 100644 --- a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll +++ b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll @@ -38,3 +38,40 @@ okay: incompatible: ret void } + +define void @f_logical(i8* %x) nounwind { +; CHECK-LABEL: @f_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[L1:%.*]] = load i8, i8* [[X:%.*]], align 1 +; CHECK-NEXT: [[S1:%.*]] = add i8 [[L1]], -6 +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i8 [[S1]], 2 +; CHECK-NEXT: [[S2:%.*]] = add i8 [[L1]], -10 +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i8 [[S2]], 2 +; CHECK-NEXT: [[A1:%.*]] = and i1 [[C1]], [[C2]] +; CHECK-NEXT: br i1 [[A1]], label [[INCOMPATIBLE:%.*]], label [[OKAY:%.*]] +; CHECK: okay: +; CHECK-NEXT: ret void +; CHECK: incompatible: +; CHECK-NEXT: ret void +; +entry: + br label %bb + +bb: + %g1 = getelementptr i8, i8* %x, i32 0 + %l1 = load i8, i8* %g1, align 1 + %s1 = sub i8 %l1, 6 + %c1 = icmp ugt i8 %s1, 2 + %s2 = sub i8 %l1, 10 + %c2 = icmp ugt i8 %s2, 2 + %a1 = select i1 %c1, i1 %c2, i1 false + br i1 %a1, label %incompatible, label %okay + +okay: + ret void + +incompatible: + ret void +} diff --git a/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll b/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll index 97956bc2e24975..85792304c50d96 100644 --- a/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll +++ b/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll @@ -20,3 +20,20 @@ define i1 @f1(i32 %x) { %e = and i1 %b, %d ret i1 %e } + +define i1 @f1_logical(i32 %x) { +; CHECK-LABEL: @f1_logical( +; CHECK-NEXT: [[A:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[B:%.*]] = icmp ne i8 [[A]], 0 +; CHECK-NEXT: [[C:%.*]] = and i32 [[X]], 16711680 +; CHECK-NEXT: [[D:%.*]] = icmp ne i32 [[C]], 0 +; CHECK-NEXT: [[E:%.*]] = and i1 [[B]], [[D]] +; CHECK-NEXT: ret i1 [[E]] +; + %a = trunc i32 %x to i8 + %b = icmp ne i8 %a, 0 + %c = and i32 %x, 16711680 + %d = icmp ne i32 %c, 0 + %e = select i1 %b, i1 %d, i1 false + ret i1 %e +} diff --git a/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll b/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll index 8ef65a27c1f55e..d180560bfbcc3a 100644 --- a/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll +++ b/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll @@ -50,3 +50,50 @@ return: ; preds = %if.else, %if.then ret i32 %retval.0 } +define i32 @func_logical(i8* %c, i8* %f) nounwind uwtable readnone noinline ssp { +; CHECK-LABEL: @func_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = alloca i8, align 1 +; CHECK-NEXT: store i8 0, i8* [[D]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[D]], [[C:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ule i8* [[D]], [[F:%.*]] +; CHECK-NEXT: [[NOT_CMP1:%.*]] = icmp uge i8* [[C]], [[F]] +; CHECK-NEXT: [[DOTCMP2:%.*]] = and i1 [[CMP2]], [[NOT_CMP1]] +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[CMP5:%.*]] = icmp uge i8* [[D]], [[F]] +; CHECK-NEXT: [[NOT_CMP3:%.*]] = icmp ule i8* [[C]], [[F]] +; CHECK-NEXT: [[DOTCMP5:%.*]] = and i1 [[CMP5]], [[NOT_CMP3]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0_IN:%.*]] = phi i1 [ [[DOTCMP2]], [[IF_THEN]] ], [ [[DOTCMP5]], [[IF_ELSE]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = zext i1 [[RETVAL_0_IN]] to i32 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %d = alloca i8, align 1 + store i8 0, i8* %d, align 1 + %cmp = icmp ugt i8* %d, %c + br i1 %cmp, label %if.else, label %if.then + +if.then: ; preds = %entry + %cmp2 = icmp ule i8* %d, %f + %not.cmp1 = icmp uge i8* %c, %f + %.cmp2 = select i1 %cmp2, i1 %not.cmp1, i1 false + %land.ext = zext i1 %.cmp2 to i32 + br label %return + +if.else: ; preds = %entry + %cmp5 = icmp uge i8* %d, %f + %not.cmp3 = icmp ule i8* %c, %f + %.cmp5 = select i1 %cmp5, i1 %not.cmp3, i1 false + %land.ext7 = zext i1 %.cmp5 to i32 + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %land.ext, %if.then ], [ %land.ext7, %if.else ] + ret i32 %retval.0 +} + diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index dd51c6548eea64..18689c969bd04d 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -12,6 +12,17 @@ define i1 @PR1738(double %x, double %y) { ret i1 %and } +define i1 @PR1738_logical(double %x, double %y) { +; CHECK-LABEL: @PR1738_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp1 = fcmp ord double %x, 0.0 + %cmp2 = fcmp ord double %y, 0.0 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + define <2 x i1> @PR1738_vec_undef(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: @PR1738_vec_undef( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <2 x double> [[X:%.*]], [[Y:%.*]] @@ -36,6 +47,19 @@ define i1 @PR41069(i1 %z, float %c, float %d) { ret i1 %r } +define i1 @PR41069_logical(i1 %z, float %c, float %d) { +; CHECK-LABEL: @PR41069_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord float [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret i1 [[R]] +; + %ord1 = fcmp arcp ord float %c, 0.0 + %and = select i1 %ord1, i1 %z, i1 false + %ord2 = fcmp afn ord float %d, 0.0 + %r = select i1 %and, i1 %ord2, i1 false + ret i1 %r +} + define i1 @PR41069_commute(i1 %z, float %c, float %d) { ; CHECK-LABEL: @PR41069_commute( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ninf ord float [[D:%.*]], [[C:%.*]] @@ -49,6 +73,19 @@ define i1 @PR41069_commute(i1 %z, float %c, float %d) { ret i1 %r } +define i1 @PR41069_commute_logical(i1 %z, float %c, float %d) { +; CHECK-LABEL: @PR41069_commute_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ninf ord float [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret i1 [[R]] +; + %ord1 = fcmp ninf ord float %c, 0.0 + %and = select i1 %ord1, i1 %z, i1 false + %ord2 = fcmp ninf reassoc ord float %d, 0.0 + %r = select i1 %ord2, i1 %and, i1 false + ret i1 %r +} + ; Commute differently and make sure vectors work. define <2 x i1> @PR41069_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) { @@ -94,6 +131,19 @@ define i1 @PR15737(float %a, double %b) { ret i1 %and } +define i1 @PR15737_logical(float %a, double %b) { +; CHECK-LABEL: @PR15737_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[A:%.*]], 0.000000e+00 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord double [[B:%.*]], 0.000000e+00 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: ret i1 [[AND]] +; + %cmp = fcmp ord float %a, 0.000000e+00 + %cmp1 = fcmp ord double %b, 0.000000e+00 + %and = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %and +} + define <2 x i1> @t9(<2 x float> %a, <2 x double> %b) { ; CHECK-LABEL: @t9( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ord <2 x float> [[A:%.*]], zeroinitializer @@ -118,6 +168,17 @@ define i1 @fcmp_ord_nonzero(float %x, float %y) { ret i1 %and } +define i1 @fcmp_ord_nonzero_logical(float %x, float %y) { +; CHECK-LABEL: @fcmp_ord_nonzero_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp1 = fcmp ord float %x, 1.0 + %cmp2 = fcmp ord float %y, 2.0 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + define <3 x i1> @fcmp_ord_nonzero_vec(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: @fcmp_ord_nonzero_vec( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <3 x float> [[X:%.*]], [[Y:%.*]] @@ -139,6 +200,16 @@ define i1 @auto_gen_0(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_0_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_0_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp false double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_1(double %a, double %b) { ; CHECK-LABEL: @auto_gen_1( ; CHECK-NEXT: ret i1 false @@ -149,6 +220,16 @@ define i1 @auto_gen_1(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_1_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_1_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp oeq double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_2(double %a, double %b) { ; CHECK-LABEL: @auto_gen_2( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -160,6 +241,17 @@ define i1 @auto_gen_2(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_2_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oeq double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_3(double %a, double %b) { ; CHECK-LABEL: @auto_gen_3( ; CHECK-NEXT: ret i1 false @@ -170,6 +262,16 @@ define i1 @auto_gen_3(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_3_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_3_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ogt double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_4(double %a, double %b) { ; CHECK-LABEL: @auto_gen_4( ; CHECK-NEXT: ret i1 false @@ -180,6 +282,16 @@ define i1 @auto_gen_4(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_4_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_4_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ogt double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_5(double %a, double %b) { ; CHECK-LABEL: @auto_gen_5( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -191,6 +303,17 @@ define i1 @auto_gen_5(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_5_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_5_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ogt double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_6(double %a, double %b) { ; CHECK-LABEL: @auto_gen_6( ; CHECK-NEXT: ret i1 false @@ -201,6 +324,16 @@ define i1 @auto_gen_6(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_6_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_6_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_7(double %a, double %b) { ; CHECK-LABEL: @auto_gen_7( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -212,6 +345,17 @@ define i1 @auto_gen_7(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_7_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_7_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_8(double %a, double %b) { ; CHECK-LABEL: @auto_gen_8( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -223,6 +367,17 @@ define i1 @auto_gen_8(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_8_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_8_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_9(double %a, double %b) { ; CHECK-LABEL: @auto_gen_9( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -234,6 +389,17 @@ define i1 @auto_gen_9(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_9_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_9_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_10(double %a, double %b) { ; CHECK-LABEL: @auto_gen_10( ; CHECK-NEXT: ret i1 false @@ -244,6 +410,16 @@ define i1 @auto_gen_10(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_10_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_10_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_11(double %a, double %b) { ; CHECK-LABEL: @auto_gen_11( ; CHECK-NEXT: ret i1 false @@ -254,6 +430,16 @@ define i1 @auto_gen_11(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_11_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_11_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_12(double %a, double %b) { ; CHECK-LABEL: @auto_gen_12( ; CHECK-NEXT: ret i1 false @@ -264,6 +450,16 @@ define i1 @auto_gen_12(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_12_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_12_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_13(double %a, double %b) { ; CHECK-LABEL: @auto_gen_13( ; CHECK-NEXT: ret i1 false @@ -274,6 +470,16 @@ define i1 @auto_gen_13(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_13_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_13_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_14(double %a, double %b) { ; CHECK-LABEL: @auto_gen_14( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -285,6 +491,17 @@ define i1 @auto_gen_14(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_14_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_14_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_15(double %a, double %b) { ; CHECK-LABEL: @auto_gen_15( ; CHECK-NEXT: ret i1 false @@ -295,6 +512,16 @@ define i1 @auto_gen_15(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_15_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_15_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_16(double %a, double %b) { ; CHECK-LABEL: @auto_gen_16( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -306,6 +533,17 @@ define i1 @auto_gen_16(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_16_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_16_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_17(double %a, double %b) { ; CHECK-LABEL: @auto_gen_17( ; CHECK-NEXT: ret i1 false @@ -316,6 +554,16 @@ define i1 @auto_gen_17(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_17_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_17_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_18(double %a, double %b) { ; CHECK-LABEL: @auto_gen_18( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -327,6 +575,17 @@ define i1 @auto_gen_18(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_18_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_18_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_19(double %a, double %b) { ; CHECK-LABEL: @auto_gen_19( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -338,6 +597,17 @@ define i1 @auto_gen_19(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_19_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_19_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_20(double %a, double %b) { ; CHECK-LABEL: @auto_gen_20( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -349,6 +619,17 @@ define i1 @auto_gen_20(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_20_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_20_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_21(double %a, double %b) { ; CHECK-LABEL: @auto_gen_21( ; CHECK-NEXT: ret i1 false @@ -359,6 +640,16 @@ define i1 @auto_gen_21(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_21_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_21_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_22(double %a, double %b) { ; CHECK-LABEL: @auto_gen_22( ; CHECK-NEXT: ret i1 false @@ -369,6 +660,16 @@ define i1 @auto_gen_22(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_22_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_22_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_23(double %a, double %b) { ; CHECK-LABEL: @auto_gen_23( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -380,6 +681,17 @@ define i1 @auto_gen_23(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_23_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_23_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_24(double %a, double %b) { ; CHECK-LABEL: @auto_gen_24( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -391,6 +703,17 @@ define i1 @auto_gen_24(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_24_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_24_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_25(double %a, double %b) { ; CHECK-LABEL: @auto_gen_25( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -402,6 +725,17 @@ define i1 @auto_gen_25(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_25_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_25_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_26(double %a, double %b) { ; CHECK-LABEL: @auto_gen_26( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -413,6 +747,17 @@ define i1 @auto_gen_26(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_26_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_26_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_27(double %a, double %b) { ; CHECK-LABEL: @auto_gen_27( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -424,6 +769,17 @@ define i1 @auto_gen_27(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_27_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_27_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_28(double %a, double %b) { ; CHECK-LABEL: @auto_gen_28( ; CHECK-NEXT: ret i1 false @@ -434,7 +790,17 @@ define i1 @auto_gen_28(double %a, double %b) { ret i1 %retval } -define i1 @auto_gen_29(double %a, double %b) { +define i1 @auto_gen_28_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_28_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + +define i1 @auto_gen_29(double %a, double %b) { ; CHECK-LABEL: @auto_gen_29( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[TMP1]] @@ -445,6 +811,17 @@ define i1 @auto_gen_29(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_29_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_29_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_30(double %a, double %b) { ; CHECK-LABEL: @auto_gen_30( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -456,6 +833,17 @@ define i1 @auto_gen_30(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_30_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_30_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_31(double %a, double %b) { ; CHECK-LABEL: @auto_gen_31( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -467,6 +855,17 @@ define i1 @auto_gen_31(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_31_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_31_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_32(double %a, double %b) { ; CHECK-LABEL: @auto_gen_32( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -478,6 +877,17 @@ define i1 @auto_gen_32(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_32_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_32_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_33(double %a, double %b) { ; CHECK-LABEL: @auto_gen_33( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -489,6 +899,17 @@ define i1 @auto_gen_33(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_33_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_33_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_34(double %a, double %b) { ; CHECK-LABEL: @auto_gen_34( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -500,6 +921,17 @@ define i1 @auto_gen_34(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_34_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_34_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_35(double %a, double %b) { ; CHECK-LABEL: @auto_gen_35( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -511,6 +943,17 @@ define i1 @auto_gen_35(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_35_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_35_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_36(double %a, double %b) { ; CHECK-LABEL: @auto_gen_36( ; CHECK-NEXT: ret i1 false @@ -521,6 +964,16 @@ define i1 @auto_gen_36(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_36_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_36_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_37(double %a, double %b) { ; CHECK-LABEL: @auto_gen_37( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -532,6 +985,17 @@ define i1 @auto_gen_37(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_37_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_37_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_38(double %a, double %b) { ; CHECK-LABEL: @auto_gen_38( ; CHECK-NEXT: ret i1 false @@ -542,6 +1006,16 @@ define i1 @auto_gen_38(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_38_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_38_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_39(double %a, double %b) { ; CHECK-LABEL: @auto_gen_39( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -553,6 +1027,17 @@ define i1 @auto_gen_39(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_39_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_39_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_40(double %a, double %b) { ; CHECK-LABEL: @auto_gen_40( ; CHECK-NEXT: ret i1 false @@ -563,6 +1048,16 @@ define i1 @auto_gen_40(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_40_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_40_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_41(double %a, double %b) { ; CHECK-LABEL: @auto_gen_41( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -574,6 +1069,17 @@ define i1 @auto_gen_41(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_41_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_41_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_42(double %a, double %b) { ; CHECK-LABEL: @auto_gen_42( ; CHECK-NEXT: ret i1 false @@ -584,6 +1090,16 @@ define i1 @auto_gen_42(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_42_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_42_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_43(double %a, double %b) { ; CHECK-LABEL: @auto_gen_43( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -595,6 +1111,17 @@ define i1 @auto_gen_43(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_43_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_43_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_44(double %a, double %b) { ; CHECK-LABEL: @auto_gen_44( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -606,6 +1133,17 @@ define i1 @auto_gen_44(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_44_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_44_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_45(double %a, double %b) { ; CHECK-LABEL: @auto_gen_45( ; CHECK-NEXT: ret i1 false @@ -616,6 +1154,16 @@ define i1 @auto_gen_45(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_45_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_45_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_46(double %a, double %b) { ; CHECK-LABEL: @auto_gen_46( ; CHECK-NEXT: ret i1 false @@ -626,6 +1174,16 @@ define i1 @auto_gen_46(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_46_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_46_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_47(double %a, double %b) { ; CHECK-LABEL: @auto_gen_47( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -637,6 +1195,17 @@ define i1 @auto_gen_47(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_47_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_47_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_48(double %a, double %b) { ; CHECK-LABEL: @auto_gen_48( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -648,6 +1217,17 @@ define i1 @auto_gen_48(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_48_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_48_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_49(double %a, double %b) { ; CHECK-LABEL: @auto_gen_49( ; CHECK-NEXT: ret i1 false @@ -658,6 +1238,16 @@ define i1 @auto_gen_49(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_49_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_49_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_50(double %a, double %b) { ; CHECK-LABEL: @auto_gen_50( ; CHECK-NEXT: ret i1 false @@ -668,6 +1258,16 @@ define i1 @auto_gen_50(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_50_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_50_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_51(double %a, double %b) { ; CHECK-LABEL: @auto_gen_51( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -679,6 +1279,17 @@ define i1 @auto_gen_51(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_51_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_51_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_52(double %a, double %b) { ; CHECK-LABEL: @auto_gen_52( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -690,6 +1301,17 @@ define i1 @auto_gen_52(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_52_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_52_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_53(double %a, double %b) { ; CHECK-LABEL: @auto_gen_53( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -701,6 +1323,17 @@ define i1 @auto_gen_53(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_53_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_53_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_54(double %a, double %b) { ; CHECK-LABEL: @auto_gen_54( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -712,6 +1345,17 @@ define i1 @auto_gen_54(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_54_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_54_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_55(double %a, double %b) { ; CHECK-LABEL: @auto_gen_55( ; CHECK-NEXT: ret i1 false @@ -722,6 +1366,16 @@ define i1 @auto_gen_55(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_55_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_55_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_56(double %a, double %b) { ; CHECK-LABEL: @auto_gen_56( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -733,6 +1387,17 @@ define i1 @auto_gen_56(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_56_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_56_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_57(double %a, double %b) { ; CHECK-LABEL: @auto_gen_57( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -744,6 +1409,17 @@ define i1 @auto_gen_57(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_57_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_57_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_58(double %a, double %b) { ; CHECK-LABEL: @auto_gen_58( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -755,6 +1431,17 @@ define i1 @auto_gen_58(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_58_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_58_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_59(double %a, double %b) { ; CHECK-LABEL: @auto_gen_59( ; CHECK-NEXT: ret i1 false @@ -765,6 +1452,16 @@ define i1 @auto_gen_59(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_59_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_59_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_60(double %a, double %b) { ; CHECK-LABEL: @auto_gen_60( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -776,6 +1473,17 @@ define i1 @auto_gen_60(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_60_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_60_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_61(double %a, double %b) { ; CHECK-LABEL: @auto_gen_61( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -787,8 +1495,19 @@ define i1 @auto_gen_61(double %a, double %b) { ret i1 %retval } -define i1 @auto_gen_62(double %a, double %b) { -; CHECK-LABEL: @auto_gen_62( +define i1 @auto_gen_61_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_61_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + +define i1 @auto_gen_62(double %a, double %b) { +; CHECK-LABEL: @auto_gen_62( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[TMP1]] ; @@ -798,6 +1517,17 @@ define i1 @auto_gen_62(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_62_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_62_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_63(double %a, double %b) { ; CHECK-LABEL: @auto_gen_63( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -809,6 +1539,17 @@ define i1 @auto_gen_63(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_63_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_63_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_64(double %a, double %b) { ; CHECK-LABEL: @auto_gen_64( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -820,6 +1561,17 @@ define i1 @auto_gen_64(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_64_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_64_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_65(double %a, double %b) { ; CHECK-LABEL: @auto_gen_65( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -831,6 +1583,17 @@ define i1 @auto_gen_65(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_65_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_65_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_66(double %a, double %b) { ; CHECK-LABEL: @auto_gen_66( ; CHECK-NEXT: ret i1 false @@ -841,6 +1604,16 @@ define i1 @auto_gen_66(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_66_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_66_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_67(double %a, double %b) { ; CHECK-LABEL: @auto_gen_67( ; CHECK-NEXT: ret i1 false @@ -851,6 +1624,16 @@ define i1 @auto_gen_67(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_67_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_67_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_68(double %a, double %b) { ; CHECK-LABEL: @auto_gen_68( ; CHECK-NEXT: ret i1 false @@ -861,6 +1644,16 @@ define i1 @auto_gen_68(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_68_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_68_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_69(double %a, double %b) { ; CHECK-LABEL: @auto_gen_69( ; CHECK-NEXT: ret i1 false @@ -871,6 +1664,16 @@ define i1 @auto_gen_69(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_69_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_69_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_70(double %a, double %b) { ; CHECK-LABEL: @auto_gen_70( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -882,6 +1685,17 @@ define i1 @auto_gen_70(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_70_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_70_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_71(double %a, double %b) { ; CHECK-LABEL: @auto_gen_71( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -893,6 +1707,17 @@ define i1 @auto_gen_71(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_71_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_71_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_72(double %a, double %b) { ; CHECK-LABEL: @auto_gen_72( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -904,6 +1729,17 @@ define i1 @auto_gen_72(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_72_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_72_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_73(double %a, double %b) { ; CHECK-LABEL: @auto_gen_73( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -915,6 +1751,17 @@ define i1 @auto_gen_73(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_73_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_73_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_74(double %a, double %b) { ; CHECK-LABEL: @auto_gen_74( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -926,6 +1773,17 @@ define i1 @auto_gen_74(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_74_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_74_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_75(double %a, double %b) { ; CHECK-LABEL: @auto_gen_75( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -937,6 +1795,17 @@ define i1 @auto_gen_75(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_75_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_75_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_76(double %a, double %b) { ; CHECK-LABEL: @auto_gen_76( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -948,6 +1817,17 @@ define i1 @auto_gen_76(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_76_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_76_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_77(double %a, double %b) { ; CHECK-LABEL: @auto_gen_77( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -959,6 +1839,17 @@ define i1 @auto_gen_77(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_77_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_77_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_78(double %a, double %b) { ; CHECK-LABEL: @auto_gen_78( ; CHECK-NEXT: ret i1 false @@ -969,6 +1860,16 @@ define i1 @auto_gen_78(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_78_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_78_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_79(double %a, double %b) { ; CHECK-LABEL: @auto_gen_79( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -980,6 +1881,17 @@ define i1 @auto_gen_79(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_79_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_79_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_80(double %a, double %b) { ; CHECK-LABEL: @auto_gen_80( ; CHECK-NEXT: ret i1 false @@ -990,6 +1902,16 @@ define i1 @auto_gen_80(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_80_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_80_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_81(double %a, double %b) { ; CHECK-LABEL: @auto_gen_81( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -1001,6 +1923,17 @@ define i1 @auto_gen_81(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_81_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_81_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_82(double %a, double %b) { ; CHECK-LABEL: @auto_gen_82( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -1012,6 +1945,17 @@ define i1 @auto_gen_82(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_82_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_82_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_83(double %a, double %b) { ; CHECK-LABEL: @auto_gen_83( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -1023,6 +1967,17 @@ define i1 @auto_gen_83(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_83_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_83_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_84(double %a, double %b) { ; CHECK-LABEL: @auto_gen_84( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -1034,6 +1989,17 @@ define i1 @auto_gen_84(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_84_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_84_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_85(double %a, double %b) { ; CHECK-LABEL: @auto_gen_85( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -1045,6 +2011,17 @@ define i1 @auto_gen_85(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_85_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_85_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_86(double %a, double %b) { ; CHECK-LABEL: @auto_gen_86( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -1056,6 +2033,17 @@ define i1 @auto_gen_86(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_86_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_86_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_87(double %a, double %b) { ; CHECK-LABEL: @auto_gen_87( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1067,6 +2055,17 @@ define i1 @auto_gen_87(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_87_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_87_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_88(double %a, double %b) { ; CHECK-LABEL: @auto_gen_88( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -1078,6 +2077,17 @@ define i1 @auto_gen_88(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_88_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_88_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_89(double %a, double %b) { ; CHECK-LABEL: @auto_gen_89( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -1089,6 +2099,17 @@ define i1 @auto_gen_89(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_89_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_89_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_90(double %a, double %b) { ; CHECK-LABEL: @auto_gen_90( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1100,6 +2121,17 @@ define i1 @auto_gen_90(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_90_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_90_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_91(double %a, double %b) { ; CHECK-LABEL: @auto_gen_91( ; CHECK-NEXT: ret i1 false @@ -1110,6 +2142,16 @@ define i1 @auto_gen_91(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_91_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_91_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_92(double %a, double %b) { ; CHECK-LABEL: @auto_gen_92( ; CHECK-NEXT: ret i1 false @@ -1120,6 +2162,16 @@ define i1 @auto_gen_92(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_92_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_92_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_93(double %a, double %b) { ; CHECK-LABEL: @auto_gen_93( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -1131,6 +2183,17 @@ define i1 @auto_gen_93(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_93_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_93_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_94(double %a, double %b) { ; CHECK-LABEL: @auto_gen_94( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -1142,6 +2205,17 @@ define i1 @auto_gen_94(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_94_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_94_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_95(double %a, double %b) { ; CHECK-LABEL: @auto_gen_95( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -1153,6 +2227,17 @@ define i1 @auto_gen_95(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_95_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_95_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_96(double %a, double %b) { ; CHECK-LABEL: @auto_gen_96( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -1164,6 +2249,17 @@ define i1 @auto_gen_96(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_96_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_96_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_97(double %a, double %b) { ; CHECK-LABEL: @auto_gen_97( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -1175,6 +2271,17 @@ define i1 @auto_gen_97(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_97_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_97_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_98(double %a, double %b) { ; CHECK-LABEL: @auto_gen_98( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -1186,6 +2293,17 @@ define i1 @auto_gen_98(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_98_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_98_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_99(double %a, double %b) { ; CHECK-LABEL: @auto_gen_99( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1197,6 +2315,17 @@ define i1 @auto_gen_99(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_99_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_99_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_100(double %a, double %b) { ; CHECK-LABEL: @auto_gen_100( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -1208,6 +2337,17 @@ define i1 @auto_gen_100(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_100_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_100_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_101(double %a, double %b) { ; CHECK-LABEL: @auto_gen_101( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -1219,6 +2359,17 @@ define i1 @auto_gen_101(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_101_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_101_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_102(double %a, double %b) { ; CHECK-LABEL: @auto_gen_102( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -1226,7 +2377,18 @@ define i1 @auto_gen_102(double %a, double %b) { ; %cmp = fcmp une double %a, %b %cmp1 = fcmp ult double %a, %b - %retval = and i1 %cmp, %cmp1 + %retval = and i1 %cmp, %cmp1 + ret i1 %retval +} + +define i1 @auto_gen_102_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_102_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false ret i1 %retval } @@ -1241,6 +2403,17 @@ define i1 @auto_gen_103(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_103_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_103_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_104(double %a, double %b) { ; CHECK-LABEL: @auto_gen_104( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1252,6 +2425,17 @@ define i1 @auto_gen_104(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_104_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_104_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp une double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_105(double %a, double %b) { ; CHECK-LABEL: @auto_gen_105( ; CHECK-NEXT: ret i1 false @@ -1262,6 +2446,16 @@ define i1 @auto_gen_105(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_105_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_105_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_106(double %a, double %b) { ; CHECK-LABEL: @auto_gen_106( ; CHECK-NEXT: ret i1 false @@ -1272,6 +2466,16 @@ define i1 @auto_gen_106(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_106_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_106_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_107(double %a, double %b) { ; CHECK-LABEL: @auto_gen_107( ; CHECK-NEXT: ret i1 false @@ -1282,6 +2486,16 @@ define i1 @auto_gen_107(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_107_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_107_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_108(double %a, double %b) { ; CHECK-LABEL: @auto_gen_108( ; CHECK-NEXT: ret i1 false @@ -1292,6 +2506,16 @@ define i1 @auto_gen_108(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_108_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_108_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_109(double %a, double %b) { ; CHECK-LABEL: @auto_gen_109( ; CHECK-NEXT: ret i1 false @@ -1302,6 +2526,16 @@ define i1 @auto_gen_109(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_109_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_109_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_110(double %a, double %b) { ; CHECK-LABEL: @auto_gen_110( ; CHECK-NEXT: ret i1 false @@ -1312,6 +2546,16 @@ define i1 @auto_gen_110(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_110_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_110_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_111(double %a, double %b) { ; CHECK-LABEL: @auto_gen_111( ; CHECK-NEXT: ret i1 false @@ -1322,6 +2566,16 @@ define i1 @auto_gen_111(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_111_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_111_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_112(double %a, double %b) { ; CHECK-LABEL: @auto_gen_112( ; CHECK-NEXT: ret i1 false @@ -1332,6 +2586,16 @@ define i1 @auto_gen_112(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_112_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_112_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_113(double %a, double %b) { ; CHECK-LABEL: @auto_gen_113( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1343,6 +2607,17 @@ define i1 @auto_gen_113(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_113_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_113_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_114(double %a, double %b) { ; CHECK-LABEL: @auto_gen_114( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1354,6 +2629,17 @@ define i1 @auto_gen_114(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_114_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_114_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_115(double %a, double %b) { ; CHECK-LABEL: @auto_gen_115( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1365,6 +2651,17 @@ define i1 @auto_gen_115(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_115_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_115_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_116(double %a, double %b) { ; CHECK-LABEL: @auto_gen_116( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1376,6 +2673,17 @@ define i1 @auto_gen_116(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_116_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_116_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_117(double %a, double %b) { ; CHECK-LABEL: @auto_gen_117( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1387,6 +2695,17 @@ define i1 @auto_gen_117(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_117_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_117_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_118(double %a, double %b) { ; CHECK-LABEL: @auto_gen_118( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1398,6 +2717,17 @@ define i1 @auto_gen_118(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_118_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_118_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp une double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_119(double %a, double %b) { ; CHECK-LABEL: @auto_gen_119( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1409,6 +2739,17 @@ define i1 @auto_gen_119(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_119_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_119_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp uno double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_120(double %a, double %b) { ; CHECK-LABEL: @auto_gen_120( ; CHECK-NEXT: ret i1 false @@ -1419,6 +2760,16 @@ define i1 @auto_gen_120(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_120_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_120_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_121(double %a, double %b) { ; CHECK-LABEL: @auto_gen_121( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -1430,6 +2781,17 @@ define i1 @auto_gen_121(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_121_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_121_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_122(double %a, double %b) { ; CHECK-LABEL: @auto_gen_122( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -1441,6 +2803,17 @@ define i1 @auto_gen_122(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_122_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_122_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_123(double %a, double %b) { ; CHECK-LABEL: @auto_gen_123( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -1452,6 +2825,17 @@ define i1 @auto_gen_123(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_123_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_123_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_124(double %a, double %b) { ; CHECK-LABEL: @auto_gen_124( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -1463,6 +2847,17 @@ define i1 @auto_gen_124(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_124_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_124_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_125(double %a, double %b) { ; CHECK-LABEL: @auto_gen_125( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -1474,6 +2869,17 @@ define i1 @auto_gen_125(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_125_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_125_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_126(double %a, double %b) { ; CHECK-LABEL: @auto_gen_126( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -1485,6 +2891,17 @@ define i1 @auto_gen_126(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_126_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_126_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_127(double %a, double %b) { ; CHECK-LABEL: @auto_gen_127( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -1496,6 +2913,17 @@ define i1 @auto_gen_127(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_127_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_127_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_128(double %a, double %b) { ; CHECK-LABEL: @auto_gen_128( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -1507,6 +2935,17 @@ define i1 @auto_gen_128(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_128_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_128_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_129(double %a, double %b) { ; CHECK-LABEL: @auto_gen_129( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -1518,6 +2957,17 @@ define i1 @auto_gen_129(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_129_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_129_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_130(double %a, double %b) { ; CHECK-LABEL: @auto_gen_130( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -1529,6 +2979,17 @@ define i1 @auto_gen_130(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_130_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_130_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_131(double %a, double %b) { ; CHECK-LABEL: @auto_gen_131( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -1540,6 +3001,17 @@ define i1 @auto_gen_131(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_131_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_131_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_132(double %a, double %b) { ; CHECK-LABEL: @auto_gen_132( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1551,6 +3023,17 @@ define i1 @auto_gen_132(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_132_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_132_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_133(double %a, double %b) { ; CHECK-LABEL: @auto_gen_133( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1562,6 +3045,17 @@ define i1 @auto_gen_133(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_133_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_133_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp une double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_134(double %a, double %b) { ; CHECK-LABEL: @auto_gen_134( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1573,6 +3067,17 @@ define i1 @auto_gen_134(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_134_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_134_logical( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp uno double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} + define i1 @auto_gen_135(double %a, double %b) { ; CHECK-LABEL: @auto_gen_135( ; CHECK-NEXT: ret i1 true @@ -1582,3 +3087,13 @@ define i1 @auto_gen_135(double %a, double %b) { %retval = and i1 %cmp, %cmp1 ret i1 %retval } + +define i1 @auto_gen_135_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_135_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp true double %a, %b + %retval = select i1 %cmp, i1 %cmp1, i1 false + ret i1 %retval +} diff --git a/llvm/test/Transforms/InstCombine/and-or-icmp-min-max.ll b/llvm/test/Transforms/InstCombine/and-or-icmp-min-max.ll index 7f07147b14ffca..e24bb581230273 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmp-min-max.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmp-min-max.ll @@ -23,6 +23,16 @@ define i1 @slt_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_and_max_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp slt i8 %x, %y + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define <2 x i1> @slt_and_max_commute(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @slt_and_max_commute( ; CHECK-NEXT: ret <2 x i1> zeroinitializer @@ -43,6 +53,16 @@ define i1 @slt_swap_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_swap_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_swap_and_max_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp sgt i8 %y, %x + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @slt_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_and_max_commute( ; CHECK-NEXT: ret i1 false @@ -53,6 +73,16 @@ define i1 @slt_swap_and_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_swap_and_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_swap_and_max_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp sgt i8 %y, %x + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ult_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_max( ; CHECK-NEXT: ret i1 false @@ -63,6 +93,16 @@ define i1 @ult_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_and_max_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ult i8 %x, %y + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ult_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_max_commute( ; CHECK-NEXT: ret i1 false @@ -73,6 +113,16 @@ define i1 @ult_and_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_and_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_and_max_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ult i8 %x, %y + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ult_swap_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_max( ; CHECK-NEXT: ret i1 false @@ -83,6 +133,16 @@ define i1 @ult_swap_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_swap_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_swap_and_max_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ugt i8 %y, %x + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ult_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_max_commute( ; CHECK-NEXT: ret i1 false @@ -93,6 +153,16 @@ define i1 @ult_swap_and_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_swap_and_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_swap_and_max_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ugt i8 %y, %x + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X == MIN) && (X > Y) --> false @@ -109,6 +179,16 @@ define i1 @sgt_and_min(i9 %x, i9 %y) { ret i1 %r } +define i1 @sgt_and_min_logical(i9 %x, i9 %y) { +; CHECK-LABEL: @sgt_and_min_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp sgt i9 %x, %y + %cmpeq = icmp eq i9 %x, 256 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sgt_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_and_min_commute( ; CHECK-NEXT: ret i1 false @@ -119,6 +199,16 @@ define i1 @sgt_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_and_min_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp sgt i8 %x, %y + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @sgt_swap_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_min( ; CHECK-NEXT: ret i1 false @@ -129,6 +219,16 @@ define i1 @sgt_swap_and_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_swap_and_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_swap_and_min_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp slt i8 %y, %x + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sgt_swap_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_min_commute( ; CHECK-NEXT: ret i1 false @@ -139,6 +239,16 @@ define i1 @sgt_swap_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_swap_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_swap_and_min_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp slt i8 %y, %x + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ugt_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_and_min( ; CHECK-NEXT: ret i1 false @@ -149,6 +259,16 @@ define i1 @ugt_and_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_and_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_and_min_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ugt i8 %x, %y + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_and_min_commute( ; CHECK-NEXT: ret i1 false @@ -159,6 +279,16 @@ define i1 @ugt_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_and_min_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ugt i8 %x, %y + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_and_min( ; CHECK-NEXT: ret i1 false @@ -169,6 +299,16 @@ define i1 @ugt_swap_and_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_swap_and_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_swap_and_min_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ult i8 %y, %x + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_and_min_commute( ; CHECK-NEXT: ret i1 false @@ -179,6 +319,16 @@ define i1 @ugt_swap_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_swap_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_swap_and_min_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ult i8 %y, %x + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != MAX) || (X >= Y) --> true @@ -195,6 +345,16 @@ define i1 @sge_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_or_not_max_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sge i8 %x, %y + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sge_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_or_not_max_commute( ; CHECK-NEXT: ret i1 true @@ -205,6 +365,16 @@ define i1 @sge_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_or_not_max_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sge i8 %x, %y + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @sge_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_not_max( ; CHECK-NEXT: ret i1 true @@ -215,6 +385,16 @@ define i1 @sge_swap_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_swap_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_swap_or_not_max_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sle i8 %y, %x + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sge_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_not_max_commute( ; CHECK-NEXT: ret i1 true @@ -225,6 +405,16 @@ define i1 @sge_swap_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_swap_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_swap_or_not_max_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sle i8 %y, %x + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @uge_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_not_max( ; CHECK-NEXT: ret i1 true @@ -235,6 +425,16 @@ define i1 @uge_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_or_not_max_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp uge i8 %x, %y + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @uge_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_not_max_commute( ; CHECK-NEXT: ret i1 true @@ -245,6 +445,16 @@ define i1 @uge_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_or_not_max_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp uge i8 %x, %y + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @uge_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_not_max( ; CHECK-NEXT: ret i1 true @@ -255,6 +465,16 @@ define i1 @uge_swap_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_swap_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_swap_or_not_max_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ule i8 %y, %x + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @uge_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_not_max_commute( ; CHECK-NEXT: ret i1 true @@ -265,6 +485,16 @@ define i1 @uge_swap_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_swap_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_swap_or_not_max_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ule i8 %y, %x + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != MIN) || (X <= Y) --> true @@ -281,6 +511,16 @@ define i1 @sle_or_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_or_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_or_not_min_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sle i8 %x, %y + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sle_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_or_not_min_commute( ; CHECK-NEXT: ret i1 true @@ -291,6 +531,16 @@ define i1 @sle_or_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_or_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_or_not_min_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sle i8 %x, %y + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @sle_swap_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_not_min( ; CHECK-NEXT: ret i1 true @@ -301,6 +551,16 @@ define i1 @sle_swap_or_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_swap_or_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_swap_or_not_min_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sge i8 %y, %x + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sle_swap_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_not_min_commute( ; CHECK-NEXT: ret i1 true @@ -311,6 +571,16 @@ define i1 @sle_swap_or_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_swap_or_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_swap_or_not_min_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp sge i8 %y, %x + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ule_or_not_min(i427 %x, i427 %y) { ; CHECK-LABEL: @ule_or_not_min( ; CHECK-NEXT: ret i1 true @@ -321,6 +591,16 @@ define i1 @ule_or_not_min(i427 %x, i427 %y) { ret i1 %r } +define i1 @ule_or_not_min_logical(i427 %x, i427 %y) { +; CHECK-LABEL: @ule_or_not_min_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ule i427 %x, %y + %cmpeq = icmp ne i427 %x, 0 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_or_not_min_commute( ; CHECK-NEXT: ret i1 true @@ -331,6 +611,16 @@ define i1 @ule_or_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_or_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_or_not_min_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ule i8 %x, %y + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ule_swap_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_or_not_min( ; CHECK-NEXT: ret i1 true @@ -341,6 +631,16 @@ define i1 @ule_swap_or_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_swap_or_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_swap_or_not_min_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp uge i8 %y, %x + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_swap_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_or_not_min_commute( ; CHECK-NEXT: ret i1 true @@ -351,6 +651,16 @@ define i1 @ule_swap_or_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_swap_or_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_swap_or_not_min_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp uge i8 %y, %x + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X == MAX) && (X >= Y) --> X == MAX @@ -368,6 +678,17 @@ define i1 @sge_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_and_max_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sge i8 %x, %y + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sge_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_and_max_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 @@ -379,6 +700,17 @@ define i1 @sge_and_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_and_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_and_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sge i8 %x, %y + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @sge_swap_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_and_max( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 @@ -390,6 +722,17 @@ define i1 @sge_swap_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_swap_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_swap_and_max_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sle i8 %y, %x + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sge_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_and_max_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 @@ -401,6 +744,17 @@ define i1 @sge_swap_and_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_swap_and_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_swap_and_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sle i8 %y, %x + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @uge_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_and_max( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 @@ -412,19 +766,41 @@ define i1 @uge_and_max(i8 %x, i8 %y) { ret i1 %r } -define i1 @uge_and_max_commute(i8 %x, i8 %y) { -; CHECK-LABEL: @uge_and_max_commute( +define i1 @uge_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_and_max_logical( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMPEQ]] ; %cmp = icmp uge i8 %x, %y %cmpeq = icmp eq i8 %x, 255 - %r = and i1 %cmpeq, %cmp + %r = select i1 %cmp, i1 %cmpeq, i1 false ret i1 %r } -define i1 @uge_swap_and_max(i8 %x, i8 %y) { -; CHECK-LABEL: @uge_swap_and_max( +define i1 @uge_and_max_commute(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_and_max_commute( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp uge i8 %x, %y + %cmpeq = icmp eq i8 %x, 255 + %r = and i1 %cmpeq, %cmp + ret i1 %r +} + +define i1 @uge_and_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_and_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp uge i8 %x, %y + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + +define i1 @uge_swap_and_max(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_swap_and_max( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMPEQ]] ; @@ -434,6 +810,17 @@ define i1 @uge_swap_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_swap_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_swap_and_max_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ule i8 %y, %x + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @uge_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_and_max_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 @@ -445,6 +832,17 @@ define i1 @uge_swap_and_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_swap_and_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_swap_and_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ule i8 %y, %x + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X == MIN) && (X <= Y) --> X == MIN @@ -462,6 +860,17 @@ define i1 @sle_and_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_and_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sle i8 %x, %y + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sle_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_and_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 @@ -473,6 +882,17 @@ define i1 @sle_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_and_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sle i8 %x, %y + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @sle_swap_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_and_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 @@ -484,6 +904,17 @@ define i1 @sle_swap_and_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_swap_and_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_swap_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sge i8 %y, %x + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sle_swap_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_and_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 @@ -495,6 +926,17 @@ define i1 @sle_swap_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_swap_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_swap_and_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sge i8 %y, %x + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ule_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_and_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 @@ -506,6 +948,17 @@ define i1 @ule_and_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_and_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ule i8 %x, %y + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ule_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_and_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 @@ -517,6 +970,17 @@ define i1 @ule_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_and_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ule i8 %x, %y + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ule_swap_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_and_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 @@ -528,6 +992,17 @@ define i1 @ule_swap_and_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_swap_and_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_swap_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp uge i8 %y, %x + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ule_swap_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_and_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 @@ -539,6 +1014,17 @@ define i1 @ule_swap_and_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_swap_and_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_swap_and_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp uge i8 %y, %x + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X == MAX) || (X >= Y) --> X >= Y @@ -556,6 +1042,17 @@ define i1 @sge_or_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_or_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_or_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sge i8 %x, %y + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sge_or_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_or_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] @@ -567,6 +1064,17 @@ define i1 @sge_or_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_or_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_or_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sge i8 %x, %y + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @sge_swap_or_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_max( ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] @@ -578,6 +1086,17 @@ define i1 @sge_swap_or_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_swap_or_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_swap_or_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sle i8 %y, %x + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sge_swap_or_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] @@ -589,6 +1108,17 @@ define i1 @sge_swap_or_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_swap_or_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_swap_or_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sle i8 %y, %x + %cmpeq = icmp eq i8 %x, 127 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @uge_or_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_max( ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] @@ -600,6 +1130,17 @@ define i1 @uge_or_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_or_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_or_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp uge i8 %x, %y + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @uge_or_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] @@ -611,6 +1152,17 @@ define i1 @uge_or_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_or_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_or_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp uge i8 %x, %y + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @uge_swap_or_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_max( ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] @@ -622,6 +1174,17 @@ define i1 @uge_swap_or_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_swap_or_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_swap_or_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ule i8 %y, %x + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @uge_swap_or_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] @@ -633,6 +1196,17 @@ define i1 @uge_swap_or_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @uge_swap_or_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @uge_swap_or_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ule i8 %y, %x + %cmpeq = icmp eq i8 %x, 255 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X == MIN) || (X <= Y) --> X <= Y @@ -650,6 +1224,17 @@ define i1 @sle_or_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_or_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_or_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sle i8 %x, %y + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sle_or_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_or_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[X:%.*]], [[Y:%.*]] @@ -661,6 +1246,17 @@ define i1 @sle_or_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_or_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_or_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sle i8 %x, %y + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @sle_swap_or_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] @@ -672,6 +1268,17 @@ define i1 @sle_swap_or_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_swap_or_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_swap_or_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sge i8 %y, %x + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sle_swap_or_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] @@ -683,6 +1290,17 @@ define i1 @sle_swap_or_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sle_swap_or_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sle_swap_or_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sge i8 %y, %x + %cmpeq = icmp eq i8 %x, 128 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ule_or_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_or_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[X:%.*]], [[Y:%.*]] @@ -694,6 +1312,17 @@ define i1 @ule_or_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_or_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_or_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ule i8 %x, %y + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_or_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_or_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[X:%.*]], [[Y:%.*]] @@ -705,6 +1334,17 @@ define i1 @ule_or_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_or_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_or_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ule i8 %x, %y + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ule_swap_or_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_or_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[Y:%.*]], [[X:%.*]] @@ -716,6 +1356,17 @@ define i1 @ule_swap_or_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_swap_or_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_swap_or_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp uge i8 %y, %x + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_swap_or_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_or_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[Y:%.*]], [[X:%.*]] @@ -727,6 +1378,17 @@ define i1 @ule_swap_or_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ule_swap_or_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ule_swap_or_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp uge i8 %y, %x + %cmpeq = icmp eq i8 %x, 0 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != MAX) && (X < Y) --> X < Y @@ -744,6 +1406,17 @@ define i1 @slt_and_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_and_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_and_not_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp slt i8 %x, %y + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @slt_and_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_and_not_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]] @@ -755,6 +1428,17 @@ define i1 @slt_and_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_and_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_and_not_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp slt i8 %x, %y + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @slt_swap_and_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_and_not_max( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] @@ -766,6 +1450,17 @@ define i1 @slt_swap_and_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_swap_and_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_swap_and_not_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sgt i8 %y, %x + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @slt_swap_and_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_and_not_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] @@ -777,6 +1472,17 @@ define i1 @slt_swap_and_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_swap_and_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_swap_and_not_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sgt i8 %y, %x + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ult_and_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_not_max( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] @@ -788,6 +1494,17 @@ define i1 @ult_and_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_and_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_and_not_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ult i8 %x, %y + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ult_and_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_not_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] @@ -799,6 +1516,17 @@ define i1 @ult_and_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_and_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_and_not_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ult i8 %x, %y + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ult_swap_and_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_not_max( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] @@ -810,6 +1538,17 @@ define i1 @ult_swap_and_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_swap_and_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_swap_and_not_max_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ugt i8 %y, %x + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ult_swap_and_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_not_max_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] @@ -821,6 +1560,17 @@ define i1 @ult_swap_and_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_swap_and_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_swap_and_not_max_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ugt i8 %y, %x + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != MIN) && (X > Y) --> X > Y @@ -838,6 +1588,17 @@ define i1 @sgt_and_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_and_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_and_not_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sgt i8 %x, %y + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sgt_and_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_and_not_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]] @@ -849,6 +1610,17 @@ define i1 @sgt_and_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_and_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_and_not_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp sgt i8 %x, %y + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @sgt_swap_and_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_not_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] @@ -860,6 +1632,17 @@ define i1 @sgt_swap_and_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_swap_and_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_swap_and_not_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp slt i8 %y, %x + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sgt_swap_and_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_not_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] @@ -871,6 +1654,17 @@ define i1 @sgt_swap_and_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_swap_and_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_swap_and_not_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp slt i8 %y, %x + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ugt_and_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_and_not_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] @@ -882,6 +1676,17 @@ define i1 @ugt_and_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_and_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_and_not_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ugt i8 %x, %y + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_and_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_and_not_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] @@ -893,6 +1698,17 @@ define i1 @ugt_and_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_and_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_and_not_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ugt i8 %x, %y + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_and_not_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]] @@ -904,6 +1720,17 @@ define i1 @ugt_swap_and_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_swap_and_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_swap_and_not_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ult i8 %y, %x + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_and_not_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]] @@ -915,6 +1742,17 @@ define i1 @ugt_swap_and_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_swap_and_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_swap_and_not_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ult i8 %y, %x + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != MAX) || (X < Y) --> X != MAX @@ -932,6 +1770,17 @@ define i1 @slt_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_or_not_max_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp slt i8 %x, %y + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @slt_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_or_not_max_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 @@ -943,6 +1792,17 @@ define i1 @slt_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_or_not_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp slt i8 %x, %y + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @slt_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_or_not_max( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 @@ -954,6 +1814,17 @@ define i1 @slt_swap_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_swap_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_swap_or_not_max_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sgt i8 %y, %x + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @slt_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_or_not_max_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 @@ -965,6 +1836,17 @@ define i1 @slt_swap_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_swap_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_swap_or_not_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sgt i8 %y, %x + %cmpeq = icmp ne i8 %x, 127 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ult_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_or_not_max( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 @@ -976,6 +1858,17 @@ define i1 @ult_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_or_not_max_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ult i8 %x, %y + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ult_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_or_not_max_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 @@ -987,6 +1880,17 @@ define i1 @ult_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_or_not_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ult i8 %x, %y + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ult_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_or_not_max( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 @@ -998,6 +1902,17 @@ define i1 @ult_swap_or_not_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_swap_or_not_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_swap_or_not_max_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ugt i8 %y, %x + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ult_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_or_not_max_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 @@ -1009,6 +1924,17 @@ define i1 @ult_swap_or_not_max_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ult_swap_or_not_max_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_swap_or_not_max_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ugt i8 %y, %x + %cmpeq = icmp ne i8 %x, 255 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != MIN) || (X > Y) --> X != MIN @@ -1026,6 +1952,17 @@ define i1 @sgt_or_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_or_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sgt i8 %x, %y + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sgt_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_or_not_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 @@ -1037,6 +1974,17 @@ define i1 @sgt_or_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_or_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_or_not_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp sgt i8 %x, %y + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @sgt_swap_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_or_not_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 @@ -1048,6 +1996,17 @@ define i1 @sgt_swap_or_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_swap_or_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_swap_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp slt i8 %y, %x + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sgt_swap_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_or_not_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 @@ -1059,6 +2018,17 @@ define i1 @sgt_swap_or_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @sgt_swap_or_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sgt_swap_or_not_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp slt i8 %y, %x + %cmpeq = icmp ne i8 %x, 128 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ugt_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_or_not_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 @@ -1070,6 +2040,17 @@ define i1 @ugt_or_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_or_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ugt i8 %x, %y + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ugt_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_or_not_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 @@ -1081,6 +2062,17 @@ define i1 @ugt_or_not_min_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_or_not_min_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_or_not_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ugt i8 %x, %y + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ugt_swap_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_or_not_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 @@ -1092,6 +2084,17 @@ define i1 @ugt_swap_or_not_min(i8 %x, i8 %y) { ret i1 %r } +define i1 @ugt_swap_or_not_min_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @ugt_swap_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ult i8 %y, %x + %cmpeq = icmp ne i8 %x, 0 + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ugt_swap_or_not_min_commute(i823 %x, i823 %y) { ; CHECK-LABEL: @ugt_swap_or_not_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i823 [[X:%.*]], 0 @@ -1102,3 +2105,14 @@ define i1 @ugt_swap_or_not_min_commute(i823 %x, i823 %y) { %r = or i1 %cmpeq, %cmp ret i1 %r } + +define i1 @ugt_swap_or_not_min_commute_logical(i823 %x, i823 %y) { +; CHECK-LABEL: @ugt_swap_or_not_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i823 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ult i823 %y, %x + %cmpeq = icmp ne i823 %x, 0 + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/and-or-icmp-nullptr.ll b/llvm/test/Transforms/InstCombine/and-or-icmp-nullptr.ll index be573281eb8c02..b8a43c57bf5800 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmp-nullptr.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmp-nullptr.ll @@ -26,6 +26,16 @@ define i1 @ugt_and_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_and_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_and_min_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ugt i8* %x, %y + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_and_min_commute(<2 x i8>* %x, <2 x i8>* %y) { ; CHECK-LABEL: @ugt_and_min_commute( ; CHECK-NEXT: ret i1 false @@ -36,6 +46,16 @@ define i1 @ugt_and_min_commute(<2 x i8>* %x, <2 x i8>* %y) { ret i1 %r } +define i1 @ugt_and_min_commute_logical(<2 x i8>* %x, <2 x i8>* %y) { +; CHECK-LABEL: @ugt_and_min_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ugt <2 x i8>* %x, %y + %cmpeq = icmp eq <2 x i8>* %x, null + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_min(i8* %x, i8* %y) { ; CHECK-LABEL: @ugt_swap_and_min( ; CHECK-NEXT: ret i1 false @@ -46,6 +66,16 @@ define i1 @ugt_swap_and_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_swap_and_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_swap_and_min_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ult i8* %y, %x + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ugt_swap_and_min_commute( ; CHECK-NEXT: ret i1 false @@ -56,6 +86,16 @@ define i1 @ugt_swap_and_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_swap_and_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_swap_and_min_commute_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ult i8* %y, %x + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != null) || (X <= Y) --> true @@ -72,6 +112,16 @@ define i1 @ule_or_not_min(i427* %x, i427* %y) { ret i1 %r } +define i1 @ule_or_not_min_logical(i427* %x, i427* %y) { +; CHECK-LABEL: @ule_or_not_min_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ule i427* %x, %y + %cmpeq = icmp ne i427* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_or_not_min_commute(<3 x i9>* %x, <3 x i9>* %y) { ; CHECK-LABEL: @ule_or_not_min_commute( ; CHECK-NEXT: ret i1 true @@ -82,6 +132,16 @@ define i1 @ule_or_not_min_commute(<3 x i9>* %x, <3 x i9>* %y) { ret i1 %r } +define i1 @ule_or_not_min_commute_logical(<3 x i9>* %x, <3 x i9>* %y) { +; CHECK-LABEL: @ule_or_not_min_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ule <3 x i9>* %x, %y + %cmpeq = icmp ne <3 x i9>* %x, null + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ule_swap_or_not_min(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_swap_or_not_min( ; CHECK-NEXT: ret i1 true @@ -92,6 +152,16 @@ define i1 @ule_swap_or_not_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_swap_or_not_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_swap_or_not_min_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp uge i8* %y, %x + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_swap_or_not_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_swap_or_not_min_commute( ; CHECK-NEXT: ret i1 true @@ -102,6 +172,16 @@ define i1 @ule_swap_or_not_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_swap_or_not_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_swap_or_not_min_commute_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp uge i8* %y, %x + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X == null) && (X <= Y) --> X == null @@ -119,6 +199,17 @@ define i1 @ule_and_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_and_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ule i8* %x, %y + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ule_and_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_and_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null @@ -130,6 +221,17 @@ define i1 @ule_and_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_and_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_and_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ule i8* %x, %y + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ule_swap_and_min(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_swap_and_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null @@ -141,6 +243,17 @@ define i1 @ule_swap_and_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_swap_and_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_swap_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp uge i8* %y, %x + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ule_swap_and_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_swap_and_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null @@ -152,6 +265,17 @@ define i1 @ule_swap_and_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_swap_and_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_swap_and_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp uge i8* %y, %x + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X == null) || (X <= Y) --> X <= Y @@ -169,6 +293,17 @@ define i1 @ule_or_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_or_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_or_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8* [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ule i8* %x, %y + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_or_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_or_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8* [[X:%.*]], [[Y:%.*]] @@ -180,6 +315,17 @@ define i1 @ule_or_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_or_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_or_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8* [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ule i8* %x, %y + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ule_swap_or_min(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_swap_or_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8* [[Y:%.*]], [[X:%.*]] @@ -191,6 +337,17 @@ define i1 @ule_swap_or_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_swap_or_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_swap_or_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8* [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp uge i8* %y, %x + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ule_swap_or_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ule_swap_or_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8* [[Y:%.*]], [[X:%.*]] @@ -202,6 +359,17 @@ define i1 @ule_swap_or_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ule_swap_or_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ule_swap_or_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8* [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp uge i8* %y, %x + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != null) && (X > Y) --> X > Y @@ -219,6 +387,17 @@ define i1 @ugt_and_not_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_and_not_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_and_not_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ugt i8* %x, %y + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_and_not_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ugt_and_not_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[X:%.*]], [[Y:%.*]] @@ -230,6 +409,17 @@ define i1 @ugt_and_not_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_and_not_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_and_not_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ugt i8* %x, %y + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_not_min(i8* %x, i8* %y) { ; CHECK-LABEL: @ugt_swap_and_not_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[Y:%.*]], [[X:%.*]] @@ -241,6 +431,17 @@ define i1 @ugt_swap_and_not_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_swap_and_not_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_swap_and_not_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ult i8* %y, %x + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @ugt_swap_and_not_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ugt_swap_and_not_min_commute( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[Y:%.*]], [[X:%.*]] @@ -252,6 +453,17 @@ define i1 @ugt_swap_and_not_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_swap_and_not_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_swap_and_not_min_commute_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ult i8* %y, %x + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; (X != null) || (X > Y) --> X != null @@ -269,6 +481,17 @@ define i1 @ugt_or_not_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_or_not_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ugt i8* %x, %y + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ugt_or_not_min_commute(i8* %x, i8* %y) { ; CHECK-LABEL: @ugt_or_not_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X:%.*]], null @@ -280,6 +503,17 @@ define i1 @ugt_or_not_min_commute(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_or_not_min_commute_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_or_not_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ugt i8* %x, %y + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @ugt_swap_or_not_min(i8* %x, i8* %y) { ; CHECK-LABEL: @ugt_swap_or_not_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X:%.*]], null @@ -291,6 +525,17 @@ define i1 @ugt_swap_or_not_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @ugt_swap_or_not_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @ugt_swap_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ult i8* %y, %x + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @ugt_swap_or_not_min_commute(i823* %x, i823* %y) { ; CHECK-LABEL: @ugt_swap_or_not_min_commute( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i823* [[X:%.*]], null @@ -302,6 +547,17 @@ define i1 @ugt_swap_or_not_min_commute(i823* %x, i823* %y) { ret i1 %r } +define i1 @ugt_swap_or_not_min_commute_logical(i823* %x, i823* %y) { +; CHECK-LABEL: @ugt_swap_or_not_min_commute_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i823* [[X:%.*]], null +; CHECK-NEXT: ret i1 [[CMPEQ]] +; + %cmp = icmp ult i823* %y, %x + %cmpeq = icmp ne i823* %x, null + %r = select i1 %cmpeq, i1 true, i1 %cmp + ret i1 %r +} + define i1 @sgt_and_min(i9* %x, i9* %y) { ; CHECK-LABEL: @sgt_and_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i9* [[X:%.*]], null @@ -315,6 +571,19 @@ define i1 @sgt_and_min(i9* %x, i9* %y) { ret i1 %r } +define i1 @sgt_and_min_logical(i9* %x, i9* %y) { +; CHECK-LABEL: @sgt_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i9* [[X:%.*]], null +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i9* [[Y:%.*]], null +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CMPEQ]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp sgt i9* %x, %y + %cmpeq = icmp eq i9* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sle_or_not_min(i427* %x, i427* %y) { ; CHECK-LABEL: @sle_or_not_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i427* [[X:%.*]], null @@ -328,6 +597,19 @@ define i1 @sle_or_not_min(i427* %x, i427* %y) { ret i1 %r } +define i1 @sle_or_not_min_logical(i427* %x, i427* %y) { +; CHECK-LABEL: @sle_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i427* [[X:%.*]], null +; CHECK-NEXT: [[TMP1:%.*]] = icmp sge i427* [[Y:%.*]], null +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[CMPEQ]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp sle i427* %x, %y + %cmpeq = icmp ne i427* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @sle_and_min(i8* %x, i8* %y) { ; CHECK-LABEL: @sle_and_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null @@ -341,6 +623,19 @@ define i1 @sle_and_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @sle_and_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @sle_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[X:%.*]], null +; CHECK-NEXT: [[TMP1:%.*]] = icmp sge i8* [[Y:%.*]], null +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CMPEQ]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp sle i8* %x, %y + %cmpeq = icmp eq i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sgt_and_not_min(i8* %x, i8* %y) { ; CHECK-LABEL: @sgt_and_not_min( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8* [[X:%.*]], [[Y:%.*]] @@ -354,6 +649,19 @@ define i1 @sgt_and_not_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @sgt_and_not_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @sgt_and_not_min_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8* [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X]], null +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] +; + %cmp = icmp sgt i8* %x, %y + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmp, i1 %cmpeq, i1 false + ret i1 %r +} + define i1 @sgt_or_not_min(i8* %x, i8* %y) { ; CHECK-LABEL: @sgt_or_not_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X:%.*]], null @@ -367,6 +675,19 @@ define i1 @sgt_or_not_min(i8* %x, i8* %y) { ret i1 %r } +define i1 @sgt_or_not_min_logical(i8* %x, i8* %y) { +; CHECK-LABEL: @sgt_or_not_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8* [[X:%.*]], null +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8* [[Y:%.*]], null +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[CMPEQ]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp sgt i8* %x, %y + %cmpeq = icmp ne i8* %x, null + %r = select i1 %cmp, i1 true, i1 %cmpeq + ret i1 %r +} + define i1 @slt_and_min(i8* %a, i8* %b) { ; CHECK-LABEL: @slt_and_min( ; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[A:%.*]], null @@ -379,3 +700,16 @@ define i1 @slt_and_min(i8* %a, i8* %b) { %r = and i1 %cmpeq, %cmp ret i1 %r } + +define i1 @slt_and_min_logical(i8* %a, i8* %b) { +; CHECK-LABEL: @slt_and_min_logical( +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8* [[A:%.*]], null +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8* [[B:%.*]], null +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CMPEQ]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmpeq = icmp eq i8* %a, null + %cmp = icmp slt i8* %a, %b + %r = select i1 %cmpeq, i1 %cmp, i1 false + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index a62790bd57f4b7..0e8f0ca7bf9676 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -14,6 +14,17 @@ define i1 @PR1817_1(i32 %X) { ret i1 %C } +define i1 @PR1817_1_logical(i32 %X) { +; CHECK-LABEL: @PR1817_1_logical( +; CHECK-NEXT: [[B:%.*]] = icmp ult i32 [[X:%.*]], 10 +; CHECK-NEXT: ret i1 [[B]] +; + %A = icmp slt i32 %X, 10 + %B = icmp ult i32 %X, 10 + %C = select i1 %A, i1 %B, i1 false + ret i1 %C +} + define i1 @PR1817_2(i32 %X) { ; CHECK-LABEL: @PR1817_2( ; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[X:%.*]], 10 @@ -25,6 +36,17 @@ define i1 @PR1817_2(i32 %X) { ret i1 %C } +define i1 @PR1817_2_logical(i32 %X) { +; CHECK-LABEL: @PR1817_2_logical( +; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[X:%.*]], 10 +; CHECK-NEXT: ret i1 [[A]] +; + %A = icmp slt i32 %X, 10 + %B = icmp ult i32 %X, 10 + %C = select i1 %A, i1 true, i1 %B + ret i1 %C +} + define i1 @PR2330(i32 %a, i32 %b) { ; CHECK-LABEL: @PR2330( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] @@ -37,6 +59,18 @@ define i1 @PR2330(i32 %a, i32 %b) { ret i1 %and } +define i1 @PR2330_logical(i32 %a, i32 %b) { +; CHECK-LABEL: @PR2330_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 8 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp ult i32 %a, 8 + %cmp2 = icmp ult i32 %b, 8 + %and = select i1 %cmp2, i1 %cmp1, i1 false + ret i1 %and +} + ; if LHSC and RHSC differ only by one bit: ; (X == C1 || X == C2) -> (X & ~(C1 ^ C2)) == C1 (C1 has 1 less set bit) ; PR14708: https://bugs.llvm.org/show_bug.cgi?id=14708 @@ -53,6 +87,18 @@ define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) { ret i1 %or } +define i1 @or_eq_with_one_bit_diff_constants1_logical(i32 %x) { +; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 50 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp eq i32 %x, 50 + %cmp2 = icmp eq i32 %x, 51 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + ; (X != C1 && X != C2) -> (X & ~(C1 ^ C2)) != C1 (C1 has 1 less set bit) define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) { @@ -67,6 +113,18 @@ define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) { ret i1 %and } +define i1 @and_ne_with_one_bit_diff_constants1_logical(i32 %x) { +; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 50 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp ne i32 %x, 51 + %cmp2 = icmp ne i32 %x, 50 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + ; The constants are not necessarily off-by-one, just off-by-one-bit. define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) { @@ -81,6 +139,18 @@ define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) { ret i1 %or } +define i1 @or_eq_with_one_bit_diff_constants2_logical(i32 %x) { +; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -33 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 65 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp eq i32 %x, 97 + %cmp2 = icmp eq i32 %x, 65 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2( ; CHECK-NEXT: [[TMP1:%.*]] = and i19 [[X:%.*]], -129 @@ -93,6 +163,18 @@ define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) { ret i1 %and } +define i1 @and_ne_with_one_bit_diff_constants2_logical(i19 %x) { +; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i19 [[X:%.*]], -129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 65 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp ne i19 %x, 65 + %cmp2 = icmp ne i19 %x, 193 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + ; Make sure the constants are treated as unsigned when comparing them. define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) { @@ -107,6 +189,18 @@ define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) { ret i1 %or } +define i1 @or_eq_with_one_bit_diff_constants3_logical(i8 %x) { +; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 126 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp eq i8 %x, 254 + %cmp2 = icmp eq i8 %x, 126 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3( ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 127 @@ -119,6 +213,18 @@ define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) { ret i1 %and } +define i1 @and_ne_with_one_bit_diff_constants3_logical(i8 %x) { +; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 65 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp ne i8 %x, 65 + %cmp2 = icmp ne i8 %x, 193 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + ; Use an 'add' to eliminate an icmp if the constants are off-by-one (not off-by-one-bit). ; (X == 13 | X == 14) -> X-13 X-39 >u 1 define i1 @and_ne_with_diff_one(i32 %x) { @@ -148,6 +266,18 @@ define i1 @and_ne_with_diff_one(i32 %x) { ret i1 %and } +define i1 @and_ne_with_diff_one_logical(i32 %x) { +; CHECK-LABEL: @and_ne_with_diff_one_logical( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -39 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp ne i32 %x, 40 + %cmp2 = icmp ne i32 %x, 39 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + ; Make sure the constants are treated as signed when comparing them. ; PR32524: https://bugs.llvm.org/show_bug.cgi?id=32524 @@ -163,6 +293,18 @@ define i1 @or_eq_with_diff_one_signed(i32 %x) { ret i1 %or } +define i1 @or_eq_with_diff_one_signed_logical(i32 %x) { +; CHECK-LABEL: @or_eq_with_diff_one_signed_logical( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 2 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp eq i32 %x, 0 + %cmp2 = icmp eq i32 %x, -1 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + define i1 @and_ne_with_diff_one_signed(i64 %x) { ; CHECK-LABEL: @and_ne_with_diff_one_signed( ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], 1 @@ -175,6 +317,18 @@ define i1 @and_ne_with_diff_one_signed(i64 %x) { ret i1 %and } +define i1 @and_ne_with_diff_one_signed_logical(i64 %x) { +; CHECK-LABEL: @and_ne_with_diff_one_signed_logical( +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp ne i64 %x, -1 + %cmp2 = icmp ne i64 %x, 0 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + ; Vectors with splat constants get the same folds. define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { @@ -274,6 +428,17 @@ define i1 @PR42691_1(i32 %x) { ret i1 %c } +define i1 @PR42691_1_logical(i32 %x) { +; CHECK-LABEL: @PR42691_1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 2147483646 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp slt i32 %x, 0 + %c2 = icmp eq i32 %x, 2147483647 + %c = select i1 %c1, i1 true, i1 %c2 + ret i1 %c +} + define i1 @PR42691_2(i32 %x) { ; CHECK-LABEL: @PR42691_2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -2 @@ -285,6 +450,17 @@ define i1 @PR42691_2(i32 %x) { ret i1 %c } +define i1 @PR42691_2_logical(i32 %x) { +; CHECK-LABEL: @PR42691_2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -2 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp ult i32 %x, 2147483648 + %c2 = icmp eq i32 %x, 4294967295 + %c = select i1 %c1, i1 true, i1 %c2 + ret i1 %c +} + define i1 @PR42691_3(i32 %x) { ; CHECK-LABEL: @PR42691_3( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -2147483647 @@ -296,6 +472,17 @@ define i1 @PR42691_3(i32 %x) { ret i1 %c } +define i1 @PR42691_3_logical(i32 %x) { +; CHECK-LABEL: @PR42691_3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -2147483647 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp sge i32 %x, 0 + %c2 = icmp eq i32 %x, -2147483648 + %c = select i1 %c1, i1 true, i1 %c2 + ret i1 %c +} + define i1 @PR42691_4(i32 %x) { ; CHECK-LABEL: @PR42691_4( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 1 @@ -307,6 +494,17 @@ define i1 @PR42691_4(i32 %x) { ret i1 %c } +define i1 @PR42691_4_logical(i32 %x) { +; CHECK-LABEL: @PR42691_4_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp uge i32 %x, 2147483648 + %c2 = icmp eq i32 %x, 0 + %c = select i1 %c1, i1 true, i1 %c2 + ret i1 %c +} + define i1 @PR42691_5(i32 %x) { ; CHECK-LABEL: @PR42691_5( ; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -1 @@ -319,6 +517,18 @@ define i1 @PR42691_5(i32 %x) { ret i1 %c } +define i1 @PR42691_5_logical(i32 %x) { +; CHECK-LABEL: @PR42691_5_logical( +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X_OFF]], 2147483645 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp slt i32 %x, 1 + %c2 = icmp eq i32 %x, 2147483647 + %c = select i1 %c1, i1 true, i1 %c2 + ret i1 %c +} + define i1 @PR42691_6(i32 %x) { ; CHECK-LABEL: @PR42691_6( ; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], 2147483647 @@ -331,6 +541,18 @@ define i1 @PR42691_6(i32 %x) { ret i1 %c } +define i1 @PR42691_6_logical(i32 %x) { +; CHECK-LABEL: @PR42691_6_logical( +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X_OFF]], 2147483645 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp ult i32 %x, 2147483649 + %c2 = icmp eq i32 %x, 4294967295 + %c = select i1 %c1, i1 true, i1 %c2 + ret i1 %c +} + define i1 @PR42691_7(i32 %x) { ; CHECK-LABEL: @PR42691_7( ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 @@ -343,6 +565,18 @@ define i1 @PR42691_7(i32 %x) { ret i1 %c } +define i1 @PR42691_7_logical(i32 %x) { +; CHECK-LABEL: @PR42691_7_logical( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp uge i32 %x, 2147483649 + %c2 = icmp eq i32 %x, 0 + %c = select i1 %c1, i1 true, i1 %c2 + ret i1 %c +} + define i1 @PR42691_8(i32 %x) { ; CHECK-LABEL: @PR42691_8( ; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], 2147483647 @@ -355,6 +589,18 @@ define i1 @PR42691_8(i32 %x) { ret i1 %c } +define i1 @PR42691_8_logical(i32 %x) { +; CHECK-LABEL: @PR42691_8_logical( +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], -2147483635 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp slt i32 %x, 14 + %c2 = icmp ne i32 %x, -2147483648 + %c = select i1 %c1, i1 %c2, i1 false + ret i1 %c +} + define i1 @PR42691_9(i32 %x) { ; CHECK-LABEL: @PR42691_9( ; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -14 @@ -367,6 +613,18 @@ define i1 @PR42691_9(i32 %x) { ret i1 %c } +define i1 @PR42691_9_logical(i32 %x) { +; CHECK-LABEL: @PR42691_9_logical( +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -14 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 2147483633 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp sgt i32 %x, 13 + %c2 = icmp ne i32 %x, 2147483647 + %c = select i1 %c1, i1 %c2, i1 false + ret i1 %c +} + define i1 @PR42691_10(i32 %x) { ; CHECK-LABEL: @PR42691_10( ; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -14 @@ -379,6 +637,18 @@ define i1 @PR42691_10(i32 %x) { ret i1 %c } +define i1 @PR42691_10_logical(i32 %x) { +; CHECK-LABEL: @PR42691_10_logical( +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -14 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], -15 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %c1 = icmp ugt i32 %x, 13 + %c2 = icmp ne i32 %x, 4294967295 + %c = select i1 %c1, i1 %c2, i1 false + ret i1 %c +} + define i1 @substitute_constant_and_eq_eq(i8 %x, i8 %y) { ; CHECK-LABEL: @substitute_constant_and_eq_eq( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 @@ -392,6 +662,19 @@ define i1 @substitute_constant_and_eq_eq(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_and_eq_eq_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_and_eq_eq_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %x, 42 + %c2 = icmp eq i8 %x, %y + %r = select i1 %c1, i1 %c2, i1 false + ret i1 %r +} + define i1 @substitute_constant_and_eq_eq_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @substitute_constant_and_eq_eq_commute( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 @@ -405,6 +688,19 @@ define i1 @substitute_constant_and_eq_eq_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_and_eq_eq_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_and_eq_eq_commute_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %x, 42 + %c2 = icmp eq i8 %x, %y + %r = select i1 %c2, i1 %c1, i1 false + ret i1 %r +} + define i1 @substitute_constant_and_eq_ugt_swap(i8 %x, i8 %y) { ; CHECK-LABEL: @substitute_constant_and_eq_ugt_swap( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 @@ -418,6 +714,19 @@ define i1 @substitute_constant_and_eq_ugt_swap(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_and_eq_ugt_swap_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_and_eq_ugt_swap_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %x, 42 + %c2 = icmp ugt i8 %y, %x + %r = select i1 %c2, i1 %c1, i1 false + ret i1 %r +} + define <2 x i1> @substitute_constant_and_eq_ne_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @substitute_constant_and_eq_ne_vec( ; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i8> [[X:%.*]], @@ -446,6 +755,21 @@ define i1 @substitute_constant_and_eq_sgt_use(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_and_eq_sgt_use_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_and_eq_sgt_use_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 +; CHECK-NEXT: call void @use(i1 [[C1]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %x, 42 + call void @use(i1 %c1) + %c2 = icmp sgt i8 %x, %y + %r = select i1 %c2, i1 %c1, i1 false + ret i1 %r +} + ; Negative test - extra use define i1 @substitute_constant_and_eq_sgt_use2(i8 %x, i8 %y) { @@ -463,6 +787,21 @@ define i1 @substitute_constant_and_eq_sgt_use2(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_and_eq_sgt_use2_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_and_eq_sgt_use2_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i1 [[C2]]) +; CHECK-NEXT: [[R:%.*]] = and i1 [[C2]], [[C1]] +; CHECK-NEXT: ret i1 [[R]] +; + %c1 = icmp eq i8 %x, 42 + %c2 = icmp sgt i8 %x, %y + call void @use(i1 %c2) + %r = select i1 %c2, i1 %c1, i1 false + ret i1 %r +} + ; Extra use does not prevent transform if the expression simplifies: ; X == MAX && X < Y --> false @@ -479,6 +818,19 @@ define i1 @slt_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @slt_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_and_max_logical( +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i1 [[C2]]) +; CHECK-NEXT: ret i1 false +; + %c1 = icmp eq i8 %x, 127 + %c2 = icmp slt i8 %x, %y + call void @use(i1 %c2) + %r = select i1 %c2, i1 %c1, i1 false + ret i1 %r +} + ; Extra use does not prevent transform if the expression simplifies: ; X == MAX && X >= Y --> X == MAX @@ -496,6 +848,20 @@ define i1 @sge_and_max(i8 %x, i8 %y) { ret i1 %r } +define i1 @sge_and_max_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @sge_and_max_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[C2:%.*]] = icmp sge i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i1 [[C2]]) +; CHECK-NEXT: ret i1 [[C1]] +; + %c1 = icmp eq i8 %x, 127 + %c2 = icmp sge i8 %x, %y + call void @use(i1 %c2) + %r = select i1 %c2, i1 %c1, i1 false + ret i1 %r +} + define i1 @substitute_constant_and_ne_ugt_swap(i8 %x, i8 %y) { ; CHECK-LABEL: @substitute_constant_and_ne_ugt_swap( ; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 @@ -509,6 +875,19 @@ define i1 @substitute_constant_and_ne_ugt_swap(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_and_ne_ugt_swap_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_and_ne_ugt_swap_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i8 [[Y:%.*]], [[X]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[C2]], [[C1]] +; CHECK-NEXT: ret i1 [[R]] +; + %c1 = icmp ne i8 %x, 42 + %c2 = icmp ugt i8 %y, %x + %r = select i1 %c2, i1 %c1, i1 false + ret i1 %r +} + define i1 @substitute_constant_or_ne_swap_sle(i8 %x, i8 %y) { ; CHECK-LABEL: @substitute_constant_or_ne_swap_sle( ; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 @@ -522,6 +901,19 @@ define i1 @substitute_constant_or_ne_swap_sle(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_or_ne_swap_sle_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_or_ne_swap_sle_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], 43 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp ne i8 %x, 42 + %c2 = icmp sle i8 %y, %x + %r = select i1 %c1, i1 true, i1 %c2 + ret i1 %r +} + define i1 @substitute_constant_or_ne_uge_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @substitute_constant_or_ne_uge_commute( ; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 @@ -535,6 +927,19 @@ define i1 @substitute_constant_or_ne_uge_commute(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_or_ne_uge_commute_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_or_ne_uge_commute_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[Y:%.*]], 43 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp ne i8 %x, 42 + %c2 = icmp uge i8 %x, %y + %r = select i1 %c2, i1 true, i1 %c1 + ret i1 %r +} + ; Negative test - not safe to substitute vector constant with undef element define <2 x i1> @substitute_constant_or_ne_slt_swap_vec(<2 x i8> %x, <2 x i8> %y) { @@ -563,6 +968,19 @@ define i1 @substitute_constant_or_eq_swap_ne(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_or_eq_swap_ne_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_or_eq_swap_ne_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[Y:%.*]], [[X]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[C1]], [[C2]] +; CHECK-NEXT: ret i1 [[R]] +; + %c1 = icmp eq i8 %x, 42 + %c2 = icmp ne i8 %y, %x + %r = select i1 %c1, i1 true, i1 %c2 + ret i1 %r +} + define i1 @substitute_constant_or_ne_sge_use(i8 %x, i8 %y) { ; CHECK-LABEL: @substitute_constant_or_ne_sge_use( ; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 @@ -578,6 +996,21 @@ define i1 @substitute_constant_or_ne_sge_use(i8 %x, i8 %y) { ret i1 %r } +define i1 @substitute_constant_or_ne_sge_use_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_or_ne_sge_use_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 +; CHECK-NEXT: call void @use(i1 [[C1]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], 43 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp ne i8 %x, 42 + call void @use(i1 %c1) + %c2 = icmp sge i8 %x, %y + %r = select i1 %c2, i1 true, i1 %c1 + ret i1 %r +} + ; Negative test - extra use define i1 @substitute_constant_or_ne_ule_use2(i8 %x, i8 %y) { @@ -594,3 +1027,18 @@ define i1 @substitute_constant_or_ne_ule_use2(i8 %x, i8 %y) { %r = or i1 %c2, %c1 ret i1 %r } + +define i1 @substitute_constant_or_ne_ule_use2_logical(i8 %x, i8 %y) { +; CHECK-LABEL: @substitute_constant_or_ne_ule_use2_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[X:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp ule i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i1 [[C2]]) +; CHECK-NEXT: [[R:%.*]] = or i1 [[C2]], [[C1]] +; CHECK-NEXT: ret i1 [[R]] +; + %c1 = icmp ne i8 %x, 42 + %c2 = icmp ule i8 %x, %y + call void @use(i1 %c2) + %r = select i1 %c2, i1 true, i1 %c1 + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index 020dbc483d9de5..669cba88fabaa5 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -30,6 +30,14 @@ define i1 @test3(i1 %A) { ret i1 %B } +define i1 @test3_logical(i1 %A) { +; CHECK-LABEL: @test3_logical( +; CHECK-NEXT: ret i1 false +; + %B = select i1 %A, i1 false, i1 false + ret i1 %B +} + define i1 @test4(i1 %A) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: ret i1 [[A:%.*]] @@ -38,6 +46,14 @@ define i1 @test4(i1 %A) { ret i1 %B } +define i1 @test4_logical(i1 %A) { +; CHECK-LABEL: @test4_logical( +; CHECK-NEXT: ret i1 [[A:%.*]] +; + %B = select i1 %A, i1 true, i1 false + ret i1 %B +} + define i32 @test5(i32 %A) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: ret i32 [[A:%.*]] @@ -54,6 +70,14 @@ define i1 @test6(i1 %A) { ret i1 %B } +define i1 @test6_logical(i1 %A) { +; CHECK-LABEL: @test6_logical( +; CHECK-NEXT: ret i1 [[A:%.*]] +; + %B = select i1 %A, i1 %A, i1 false + ret i1 %B +} + ; A & ~A == 0 define i32 @test7(i32 %A) { ; CHECK-LABEL: @test7( @@ -135,6 +159,18 @@ define i1 @test12(i32 %A, i32 %B) { ret i1 %D } +define i1 @test12_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @test12_logical( +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[C1]] +; + %C1 = icmp ult i32 %A, %B + %C2 = icmp ule i32 %A, %B + ; (A < B) & (A <= B) === (A < B) + %D = select i1 %C1, i1 %C2, i1 false + ret i1 %D +} + define i1 @test13(i32 %A, i32 %B) { ; CHECK-LABEL: @test13( ; CHECK-NEXT: ret i1 false @@ -146,6 +182,17 @@ define i1 @test13(i32 %A, i32 %B) { ret i1 %D } +define i1 @test13_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @test13_logical( +; CHECK-NEXT: ret i1 false +; + %C1 = icmp ult i32 %A, %B + %C2 = icmp ugt i32 %A, %B + ; (A < B) & (A > B) === false + %D = select i1 %C1, i1 %C2, i1 false + ret i1 %D +} + define i1 @test14(i8 %A) { ; CHECK-LABEL: @test14( ; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[A:%.*]], 0 @@ -249,6 +296,17 @@ define i1 @test23(i32 %A) { ret i1 %D } +define i1 @test23_logical(i32 %A) { +; CHECK-LABEL: @test23_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %B = icmp sgt i32 %A, 1 + %C = icmp sle i32 %A, 2 + %D = select i1 %B, i1 %C, i1 false + ret i1 %D +} + ; FIXME: Vectors should fold too. define <2 x i1> @test23vec(<2 x i32> %A) { ; CHECK-LABEL: @test23vec( @@ -275,6 +333,18 @@ define i1 @test24(i32 %A) { ret i1 %D } +define i1 @test24_logical(i32 %A) { +; CHECK-LABEL: @test24_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %B = icmp sgt i32 %A, 1 + %C = icmp ne i32 %A, 2 + ;; A > 2 + %D = select i1 %B, i1 %C, i1 false + ret i1 %D +} + define i1 @test25(i32 %A) { ; CHECK-LABEL: @test25( ; CHECK-NEXT: [[A_OFF:%.*]] = add i32 [[A:%.*]], -50 @@ -287,6 +357,18 @@ define i1 @test25(i32 %A) { ret i1 %D } +define i1 @test25_logical(i32 %A) { +; CHECK-LABEL: @test25_logical( +; CHECK-NEXT: [[A_OFF:%.*]] = add i32 [[A:%.*]], -50 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A_OFF]], 50 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %B = icmp sge i32 %A, 50 + %C = icmp slt i32 %A, 100 + %D = select i1 %B, i1 %C, i1 false + ret i1 %D +} + ; FIXME: Vectors should fold too. define <2 x i1> @test25vec(<2 x i32> %A) { ; CHECK-LABEL: @test25vec( @@ -758,6 +840,21 @@ define i1 @and_orn_cmp_1(i32 %a, i32 %b, i32 %c) { ret i1 %and } +define i1 @and_orn_cmp_1_logical(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_orn_cmp_1_logical( +; CHECK-NEXT: [[X:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[AND]] +; + %x = icmp sgt i32 %a, %b + %x_inv = icmp sle i32 %a, %b + %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering + %or = select i1 %y, i1 true, i1 %x_inv + %and = select i1 %x, i1 %or, i1 false + ret i1 %and +} + ; Commute the 'and': ; ((Y | ~X) & X) -> (X & Y), where 'not' is an inverted cmp @@ -794,6 +891,21 @@ define i1 @and_orn_cmp_3(i72 %a, i72 %b, i72 %c) { ret i1 %and } +define i1 @and_orn_cmp_3_logical(i72 %a, i72 %b, i72 %c) { +; CHECK-LABEL: @and_orn_cmp_3_logical( +; CHECK-NEXT: [[X:%.*]] = icmp ugt i72 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i72 [[C:%.*]], 42 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[AND]] +; + %x = icmp ugt i72 %a, %b + %x_inv = icmp ule i72 %a, %b + %y = icmp ugt i72 %c, 42 ; thwart complexity-based ordering + %or = select i1 %x_inv, i1 true, i1 %y + %and = select i1 %x, i1 %or, i1 false + ret i1 %and +} + ; Commute the 'and': ; ((~X | Y) & X) -> (X & Y), where 'not' is an inverted cmp @@ -830,6 +942,21 @@ define i1 @andn_or_cmp_1(i37 %a, i37 %b, i37 %c) { ret i1 %and } +define i1 @andn_or_cmp_1_logical(i37 %a, i37 %b, i37 %c) { +; CHECK-LABEL: @andn_or_cmp_1_logical( +; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i37 [[C:%.*]], 42 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[X_INV]], [[Y]] +; CHECK-NEXT: ret i1 [[AND]] +; + %x = icmp sgt i37 %a, %b + %x_inv = icmp sle i37 %a, %b + %y = icmp ugt i37 %c, 42 ; thwart complexity-based ordering + %or = select i1 %y, i1 true, i1 %x + %and = select i1 %x_inv, i1 %or, i1 false + ret i1 %and +} + ; Commute the 'and': ; ((Y | X) & ~X) -> (~X & Y), where 'not' is an inverted cmp @@ -848,6 +975,21 @@ define i1 @andn_or_cmp_2(i16 %a, i16 %b, i16 %c) { ret i1 %and } +define i1 @andn_or_cmp_2_logical(i16 %a, i16 %b, i16 %c) { +; CHECK-LABEL: @andn_or_cmp_2_logical( +; CHECK-NEXT: [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i16 [[C:%.*]], 42 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[Y]], [[X_INV]] +; CHECK-NEXT: ret i1 [[AND]] +; + %x = icmp sge i16 %a, %b + %x_inv = icmp slt i16 %a, %b + %y = icmp ugt i16 %c, 42 ; thwart complexity-based ordering + %or = select i1 %y, i1 true, i1 %x + %and = select i1 %or, i1 %x_inv, i1 false + ret i1 %and +} + ; Commute the 'or': ; (~X & (X | Y)) -> (~X & Y), where 'not' is an inverted cmp @@ -884,6 +1026,21 @@ define i1 @andn_or_cmp_4(i32 %a, i32 %b, i32 %c) { ret i1 %and } +define i1 @andn_or_cmp_4_logical(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @andn_or_cmp_4_logical( +; CHECK-NEXT: [[X_INV:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[Y]], [[X_INV]] +; CHECK-NEXT: ret i1 [[AND]] +; + %x = icmp eq i32 %a, %b + %x_inv = icmp ne i32 %a, %b + %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering + %or = select i1 %x, i1 true, i1 %y + %and = select i1 %or, i1 %x_inv, i1 false + ret i1 %and +} + define i32 @lowbitmask_casted_shift(i8 %x) { ; CHECK-LABEL: @lowbitmask_casted_shift( ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 diff --git a/llvm/test/Transforms/InstCombine/and2.ll b/llvm/test/Transforms/InstCombine/and2.ll index 47b0d2d6245e4e..6b12e26ab5f377 100644 --- a/llvm/test/Transforms/InstCombine/and2.ll +++ b/llvm/test/Transforms/InstCombine/and2.ll @@ -11,6 +11,16 @@ define i1 @test2(i1 %X, i1 %Y) { ret i1 %b } +define i1 @test2_logical(i1 %X, i1 %Y) { +; CHECK-LABEL: @test2_logical( +; CHECK-NEXT: [[A:%.*]] = and i1 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[A]] +; + %a = select i1 %X, i1 %Y, i1 false + %b = select i1 %a, i1 %X, i1 false + ret i1 %b +} + define i32 @test3(i32 %X, i32 %Y) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] @@ -34,6 +44,19 @@ define i1 @test7(i32 %i, i1 %b) { ret i1 %and2 } +define i1 @test7_logical(i32 %i, i1 %b) { +; CHECK-LABEL: @test7_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[I:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp slt i32 %i, 1 + %cmp2 = icmp sgt i32 %i, -1 + %and1 = select i1 %cmp1, i1 %b, i1 false + %and2 = select i1 %and1, i1 %cmp2, i1 false + ret i1 %and2 +} + define i1 @test8(i32 %i) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[I_OFF:%.*]] = add i32 [[I:%.*]], -1 @@ -46,6 +69,18 @@ define i1 @test8(i32 %i) { ret i1 %cond } +define i1 @test8_logical(i32 %i) { +; CHECK-LABEL: @test8_logical( +; CHECK-NEXT: [[I_OFF:%.*]] = add i32 [[I:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[I_OFF]], 13 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp1 = icmp ne i32 %i, 0 + %cmp2 = icmp ult i32 %i, 14 + %cond = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %cond +} + ; FIXME: Vectors should fold too. define <2 x i1> @test8vec(<2 x i32> %i) { ; CHECK-LABEL: @test8vec( diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index a988eea894450e..f46ffbec2ce669 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -69,6 +69,19 @@ define i32 @can1(i1 %a, i1 %b, i1 %c) { ret i32 5 } +define i32 @can1_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @can1_logical( +; CHECK-NEXT: call void @llvm.assume(i1 [[A:%.*]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[B:%.*]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[C:%.*]]) +; CHECK-NEXT: ret i32 5 +; + %and1 = select i1 %a, i1 %b, i1 false + %and = select i1 %and1, i1 %c, i1 false + tail call void @llvm.assume(i1 %and) + ret i32 5 +} + define i32 @can2(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @can2( ; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[A:%.*]], true @@ -83,6 +96,20 @@ define i32 @can2(i1 %a, i1 %b, i1 %c) { ret i32 5 } +define i32 @can2_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @can2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: ret i32 5 +; + %v = select i1 %a, i1 true, i1 %b + %w = xor i1 %v, 1 + tail call void @llvm.assume(i1 %w) + ret i32 5 +} + define i32 @bar1(i32 %a) #0 { ; CHECK-LABEL: @bar1( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 @@ -595,6 +622,43 @@ exit: unreachable } +define i32 @unreachable_assume_logical(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_assume_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = select i1 %cmp0, i1 true, i1 %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + unreachable +} + define i32 @unreachable_assumes_and_store(i32 %x, i32 %y, i32* %p) { ; CHECK-LABEL: @unreachable_assumes_and_store( ; CHECK-NEXT: entry: @@ -635,6 +699,46 @@ exit: unreachable } +define i32 @unreachable_assumes_and_store_logical(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @unreachable_assumes_and_store_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = select i1 %cmp0, i1 true, i1 %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + %cmp5 = icmp ugt i32 %y, 42 + tail call void @llvm.assume(i1 %cmp5) + store i32 %x, i32* %p + unreachable +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/Transforms/InstCombine/bit-checks.ll b/llvm/test/Transforms/InstCombine/bit-checks.ll index 1ecd305e807d90..28464c41ad499e 100644 --- a/llvm/test/Transforms/InstCombine/bit-checks.ll +++ b/llvm/test/Transforms/InstCombine/bit-checks.ll @@ -3,7 +3,7 @@ define i32 @main1(i32 %argc) { ; CHECK-LABEL: @main1( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 3 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] @@ -17,11 +17,27 @@ define i32 @main1(i32 %argc) { ret i32 %retval.0 } +define i32 @main1_logical(i32 %argc) { +; CHECK-LABEL: @main1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %and = and i32 %argc, 1 + %tobool = icmp ne i32 %and, 0 + %and2 = and i32 %argc, 2 + %tobool3 = icmp ne i32 %and2, 0 + %or.cond = select i1 %tobool, i1 %tobool3, i1 false + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main2(i32 %argc) { ; CHECK-LABEL: @main2( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 3 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 3 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 1 @@ -33,6 +49,22 @@ define i32 @main2(i32 %argc) { ret i32 %storemerge } +define i32 @main2_logical(i32 %argc) { +; CHECK-LABEL: @main2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 1 + %tobool = icmp eq i32 %and, 0 + %and2 = and i32 %argc, 2 + %tobool3 = icmp eq i32 %and2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; tests to check combining (icmp eq (A & B), C) & (icmp eq (A & D), E) ; tests to check if (icmp eq (A & B), 0) is treated like (icmp eq (A & B), B) ; if B is a single bit constant @@ -40,9 +72,9 @@ define i32 @main2(i32 %argc) { ; (icmp eq (A & B), 0) & (icmp eq (A & D), 0) -> (icmp eq (A & (B|D)), 0) define i32 @main3(i32 %argc) { ; CHECK-LABEL: @main3( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 55 -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -54,11 +86,27 @@ define i32 @main3(i32 %argc) { ret i32 %storemerge } +define i32 @main3_logical(i32 %argc) { +; CHECK-LABEL: @main3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp eq i32 %and, 0 + %and2 = and i32 %argc, 48 + %tobool3 = icmp eq i32 %and2, 0 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main3b(i32 %argc) { ; CHECK-LABEL: @main3b( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 23 -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -70,12 +118,28 @@ define i32 @main3b(i32 %argc) { ret i32 %storemerge } +define i32 @main3b_logical(i32 %argc) { +; CHECK-LABEL: @main3b_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp eq i32 %and, 0 + %and2 = and i32 %argc, 16 + %tobool3 = icmp ne i32 %and2, 16 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main3e_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main3e_like( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -87,12 +151,29 @@ define i32 @main3e_like(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main3e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main3e_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, %argc2 + %tobool = icmp eq i32 %and, 0 + %and2 = and i32 %argc, %argc3 + %tobool3 = icmp eq i32 %and2, 0 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (icmp ne (A & B), 0) | (icmp ne (A & D), 0) -> (icmp ne (A & (B|D)), 0) define i32 @main3c(i32 %argc) { ; CHECK-LABEL: @main3c( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 55 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -104,11 +185,27 @@ define i32 @main3c(i32 %argc) { ret i32 %storemerge } +define i32 @main3c_logical(i32 %argc) { +; CHECK-LABEL: @main3c_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp ne i32 %and, 0 + %and2 = and i32 %argc, 48 + %tobool3 = icmp ne i32 %and2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main3d(i32 %argc) { ; CHECK-LABEL: @main3d( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 23 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -120,12 +217,28 @@ define i32 @main3d(i32 %argc) { ret i32 %storemerge } +define i32 @main3d_logical(i32 %argc) { +; CHECK-LABEL: @main3d_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp ne i32 %and, 0 + %and2 = and i32 %argc, 16 + %tobool3 = icmp eq i32 %and2, 16 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main3f_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main3f_like( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -137,12 +250,29 @@ define i32 @main3f_like(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main3f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main3f_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, %argc2 + %tobool = icmp ne i32 %and, 0 + %and2 = and i32 %argc, %argc3 + %tobool3 = icmp ne i32 %and2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (icmp eq (A & B), B) & (icmp eq (A & D), D) -> (icmp eq (A & (B|D)), (B|D)) define i32 @main4(i32 %argc) { ; CHECK-LABEL: @main4( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 55 -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 55 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -154,11 +284,27 @@ define i32 @main4(i32 %argc) { ret i32 %storemerge } +define i32 @main4_logical(i32 %argc) { +; CHECK-LABEL: @main4_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp eq i32 %and, 7 + %and2 = and i32 %argc, 48 + %tobool3 = icmp eq i32 %and2, 48 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main4b(i32 %argc) { ; CHECK-LABEL: @main4b( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 23 -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 23 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -170,12 +316,28 @@ define i32 @main4b(i32 %argc) { ret i32 %storemerge } +define i32 @main4b_logical(i32 %argc) { +; CHECK-LABEL: @main4b_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp eq i32 %and, 7 + %and2 = and i32 %argc, 16 + %tobool3 = icmp ne i32 %and2, 0 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main4e_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main4e_like( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -187,12 +349,29 @@ define i32 @main4e_like(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main4e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main4e_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, %argc2 + %tobool = icmp eq i32 %and, %argc2 + %and2 = and i32 %argc, %argc3 + %tobool3 = icmp eq i32 %and2, %argc3 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (icmp ne (A & B), B) | (icmp ne (A & D), D) -> (icmp ne (A & (B|D)), (B|D)) define i32 @main4c(i32 %argc) { ; CHECK-LABEL: @main4c( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 55 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 55 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -204,11 +383,27 @@ define i32 @main4c(i32 %argc) { ret i32 %storemerge } +define i32 @main4c_logical(i32 %argc) { +; CHECK-LABEL: @main4c_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp ne i32 %and, 7 + %and2 = and i32 %argc, 48 + %tobool3 = icmp ne i32 %and2, 48 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main4d(i32 %argc) { ; CHECK-LABEL: @main4d( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 23 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 23 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -220,12 +415,28 @@ define i32 @main4d(i32 %argc) { ret i32 %storemerge } +define i32 @main4d_logical(i32 %argc) { +; CHECK-LABEL: @main4d_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp ne i32 %and, 7 + %and2 = and i32 %argc, 16 + %tobool3 = icmp eq i32 %and2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main4f_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main4f_like( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -237,13 +448,30 @@ define i32 @main4f_like(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main4f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main4f_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, %argc2 + %tobool = icmp ne i32 %and, %argc2 + %and2 = and i32 %argc, %argc3 + %tobool3 = icmp ne i32 %and2, %argc3 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (icmp eq (A & B), A) & (icmp eq (A & D), A) -> (icmp eq (A & (B&D)), A) define i32 @main5_like(i32 %argc, i32 %argc2) { ; CHECK-LABEL: @main5_like( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, %argc2 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], [[ARGC2:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 7 -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], 7 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 7 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -255,12 +483,29 @@ define i32 @main5_like(i32 %argc, i32 %argc2) { ret i32 %storemerge } +define i32 @main5_like_logical(i32 %argc, i32 %argc2) { +; CHECK-LABEL: @main5_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], [[ARGC2:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 7 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp eq i32 %and, 7 + %and2 = and i32 %argc2, 7 + %tobool3 = icmp eq i32 %and2, 7 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main5e_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main5e_like( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], %argc -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[ARGC]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -272,13 +517,30 @@ define i32 @main5e_like(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main5e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main5e_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[ARGC]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, %argc2 + %tobool = icmp eq i32 %and, %argc + %and2 = and i32 %argc, %argc3 + %tobool3 = icmp eq i32 %and2, %argc + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (icmp ne (A & B), A) | (icmp ne (A & D), A) -> (icmp ne (A & (B&D)), A) define i32 @main5c_like(i32 %argc, i32 %argc2) { ; CHECK-LABEL: @main5c_like( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, %argc2 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], [[ARGC2:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 7 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP2]], 7 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -290,12 +552,29 @@ define i32 @main5c_like(i32 %argc, i32 %argc2) { ret i32 %storemerge } +define i32 @main5c_like_logical(i32 %argc, i32 %argc2) { +; CHECK-LABEL: @main5c_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], [[ARGC2:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 7 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp ne i32 %and, 7 + %and2 = and i32 %argc2, 7 + %tobool3 = icmp ne i32 %and2, 7 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main5f_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main5f_like( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP2]], %argc -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], [[ARGC]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -307,13 +586,30 @@ define i32 @main5f_like(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main5f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main5f_like_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], [[ARGC]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, %argc2 + %tobool = icmp ne i32 %and, %argc + %and2 = and i32 %argc, %argc3 + %tobool3 = icmp ne i32 %and2, %argc + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (icmp eq (A & B), C) & (icmp eq (A & D), E) -> (icmp eq (A & (B|D)), (C|E)) ; if B, C, D, E are constant, and it's possible define i32 @main6(i32 %argc) { ; CHECK-LABEL: @main6( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 55 -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -325,11 +621,27 @@ define i32 @main6(i32 %argc) { ret i32 %storemerge } +define i32 @main6_logical(i32 %argc) { +; CHECK-LABEL: @main6_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp eq i32 %and, 3 + %and2 = and i32 %argc, 48 + %tobool3 = icmp eq i32 %and2, 16 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main6b(i32 %argc) { ; CHECK-LABEL: @main6b( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 23 -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -341,13 +653,29 @@ define i32 @main6b(i32 %argc) { ret i32 %storemerge } +define i32 @main6b_logical(i32 %argc) { +; CHECK-LABEL: @main6b_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp eq i32 %and, 3 + %and2 = and i32 %argc, 16 + %tobool3 = icmp ne i32 %and2, 0 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (icmp ne (A & B), C) | (icmp ne (A & D), E) -> (icmp ne (A & (B|D)), (C|E)) ; if B, C, D, E are constant, and it's possible define i32 @main6c(i32 %argc) { ; CHECK-LABEL: @main6c( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 55 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -359,11 +687,27 @@ define i32 @main6c(i32 %argc) { ret i32 %storemerge } +define i32 @main6c_logical(i32 %argc) { +; CHECK-LABEL: @main6c_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp ne i32 %and, 3 + %and2 = and i32 %argc, 48 + %tobool3 = icmp ne i32 %and2, 16 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main6d(i32 %argc) { ; CHECK-LABEL: @main6d( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %argc, 23 -; CHECK-NEXT: [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -375,14 +719,30 @@ define i32 @main6d(i32 %argc) { ret i32 %storemerge } +define i32 @main6d_logical(i32 %argc) { +; CHECK-LABEL: @main6d_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and = and i32 %argc, 7 + %tobool = icmp ne i32 %and, 3 + %and2 = and i32 %argc, 16 + %tobool3 = icmp eq i32 %and2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %storemerge = select i1 %or.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; test parameter permutations ; (B & A) == B & (D & A) == D define i32 @main7a(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main7a( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc2, %argc @@ -394,13 +754,30 @@ define i32 @main7a(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main7a_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main7a_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and1 = and i32 %argc2, %argc + %tobool = icmp eq i32 %and1, %argc2 + %and2 = and i32 %argc3, %argc + %tobool3 = icmp eq i32 %and2, %argc3 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; B == (A & B) & D == (A & D) define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main7b( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc, %argc2 @@ -412,13 +789,30 @@ define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main7b_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main7b_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and1 = and i32 %argc, %argc2 + %tobool = icmp eq i32 %argc2, %and1 + %and2 = and i32 %argc, %argc3 + %tobool3 = icmp eq i32 %argc3, %and2 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; B == (B & A) & D == (D & A) define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main7c( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %argc2, %argc3 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc2, %argc @@ -430,15 +824,32 @@ define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3) { ret i32 %storemerge } +define i32 @main7c_logical(i32 %argc, i32 %argc2, i32 %argc3) { +; CHECK-LABEL: @main7c_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %and1 = and i32 %argc2, %argc + %tobool = icmp eq i32 %argc2, %and1 + %and2 = and i32 %argc3, %argc + %tobool3 = icmp eq i32 %argc3, %and2 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (A & (B & C)) == (B & C) & (A & (D & E)) == (D & E) define i32 @main7d(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-LABEL: @main7d( -; CHECK-NEXT: [[BC:%.*]] = and i32 %argc2, %argc4 -; CHECK-NEXT: [[DE:%.*]] = and i32 %argc3, %argc5 +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -452,15 +863,36 @@ define i32 @main7d(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ret i32 %storemerge } +define i32 @main7d_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { +; CHECK-LABEL: @main7d_logical( +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %bc = and i32 %argc2, %argc4 + %de = and i32 %argc3, %argc5 + %and1 = and i32 %argc, %bc + %tobool = icmp eq i32 %and1, %bc + %and2 = and i32 %argc, %de + %tobool3 = icmp eq i32 %and2, %de + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; ((B & C) & A) == (B & C) & ((D & E) & A) == (D & E) define i32 @main7e(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-LABEL: @main7e( -; CHECK-NEXT: [[BC:%.*]] = and i32 %argc2, %argc4 -; CHECK-NEXT: [[DE:%.*]] = and i32 %argc3, %argc5 +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -474,15 +906,36 @@ define i32 @main7e(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ret i32 %storemerge } +define i32 @main7e_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { +; CHECK-LABEL: @main7e_logical( +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %bc = and i32 %argc2, %argc4 + %de = and i32 %argc3, %argc5 + %and1 = and i32 %bc, %argc + %tobool = icmp eq i32 %and1, %bc + %and2 = and i32 %de, %argc + %tobool3 = icmp eq i32 %and2, %de + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (B & C) == (A & (B & C)) & (D & E) == (A & (D & E)) define i32 @main7f(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-LABEL: @main7f( -; CHECK-NEXT: [[BC:%.*]] = and i32 %argc2, %argc4 -; CHECK-NEXT: [[DE:%.*]] = and i32 %argc3, %argc5 +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -496,15 +949,36 @@ define i32 @main7f(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ret i32 %storemerge } +define i32 @main7f_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { +; CHECK-LABEL: @main7f_logical( +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %bc = and i32 %argc2, %argc4 + %de = and i32 %argc3, %argc5 + %and1 = and i32 %argc, %bc + %tobool = icmp eq i32 %bc, %and1 + %and2 = and i32 %argc, %de + %tobool3 = icmp eq i32 %de, %and2 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + ; (B & C) == ((B & C) & A) & (D & E) == ((D & E) & A) define i32 @main7g(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-LABEL: @main7g( -; CHECK-NEXT: [[BC:%.*]] = and i32 %argc2, %argc4 -; CHECK-NEXT: [[DE:%.*]] = and i32 %argc3, %argc5 +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], %argc -; CHECK-NEXT: [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -518,11 +992,32 @@ define i32 @main7g(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ret i32 %storemerge } +define i32 @main7g_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { +; CHECK-LABEL: @main7g_logical( +; CHECK-NEXT: [[BC:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC4:%.*]] +; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[STOREMERGE]] +; + %bc = and i32 %argc2, %argc4 + %de = and i32 %argc3, %argc5 + %and1 = and i32 %bc, %argc + %tobool = icmp eq i32 %bc, %and1 + %and2 = and i32 %de, %argc + %tobool3 = icmp eq i32 %de, %and2 + %and.cond = select i1 %tobool, i1 %tobool3, i1 false + %storemerge = select i1 %and.cond, i32 0, i32 1 + ret i32 %storemerge +} + define i32 @main8(i32 %argc) { ; CHECK-LABEL: @main8( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -534,6 +1029,22 @@ define i32 @main8(i32 %argc) { ret i32 %retval.0 } +define i32 @main8_logical(i32 %argc) { +; CHECK-LABEL: @main8_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %and = and i32 %argc, 64 + %tobool = icmp ne i32 %and, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp slt i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main9(i32 %argc) { ; CHECK-LABEL: @main9( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 @@ -550,6 +1061,22 @@ define i32 @main9(i32 %argc) { ret i32 %retval.0 } +define i32 @main9_logical(i32 %argc) { +; CHECK-LABEL: @main9_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %and = and i32 %argc, 64 + %tobool = icmp ne i32 %and, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp slt i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 %tobool3, i1 false + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main10(i32 %argc) { ; CHECK-LABEL: @main10( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 @@ -566,11 +1093,27 @@ define i32 @main10(i32 %argc) { ret i32 %retval.0 } +define i32 @main10_logical(i32 %argc) { +; CHECK-LABEL: @main10_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %and = and i32 %argc, 64 + %tobool = icmp eq i32 %and, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp sge i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 %tobool3, i1 false + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main11(i32 %argc) { ; CHECK-LABEL: @main11( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 192 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -582,11 +1125,27 @@ define i32 @main11(i32 %argc) { ret i32 %retval.0 } +define i32 @main11_logical(i32 %argc) { +; CHECK-LABEL: @main11_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 192 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %and = and i32 %argc, 64 + %tobool = icmp eq i32 %and, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp sge i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main12(i32 %argc) { ; CHECK-LABEL: @main12( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -598,6 +1157,22 @@ define i32 @main12(i32 %argc) { ret i32 %retval.0 } +define i32 @main12_logical(i32 %argc) { +; CHECK-LABEL: @main12_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %trunc = trunc i32 %argc to i16 + %tobool = icmp slt i16 %trunc, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp slt i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main13(i32 %argc) { ; CHECK-LABEL: @main13( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 @@ -614,6 +1189,22 @@ define i32 @main13(i32 %argc) { ret i32 %retval.0 } +define i32 @main13_logical(i32 %argc) { +; CHECK-LABEL: @main13_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %trunc = trunc i32 %argc to i16 + %tobool = icmp slt i16 %trunc, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp slt i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 %tobool3, i1 false + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main14(i32 %argc) { ; CHECK-LABEL: @main14( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 @@ -630,11 +1221,27 @@ define i32 @main14(i32 %argc) { ret i32 %retval.0 } +define i32 @main14_logical(i32 %argc) { +; CHECK-LABEL: @main14_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %trunc = trunc i32 %argc to i16 + %tobool = icmp sge i16 %trunc, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp sge i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 %tobool3, i1 false + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} + define i32 @main15(i32 %argc) { ; CHECK-LABEL: @main15( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 32896 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -645,3 +1252,19 @@ define i32 @main15(i32 %argc) { %retval.0 = select i1 %or.cond, i32 2, i32 1 ret i32 %retval.0 } + +define i32 @main15_logical(i32 %argc) { +; CHECK-LABEL: @main15_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 32896 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %trunc = trunc i32 %argc to i16 + %tobool = icmp sge i16 %trunc, 0 + %trunc2 = trunc i32 %argc to i8 + %tobool3 = icmp sge i8 %trunc2, 0 + %or.cond = select i1 %tobool, i1 true, i1 %tobool3 + %retval.0 = select i1 %or.cond, i32 2, i32 1 + ret i32 %retval.0 +} diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll index 0156c9071a64a9..593e50abdb120e 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll @@ -20,6 +20,22 @@ define i32 @t0_select_cond_and_v0(i32 %X) { %R = select i1 %dont_need_to_clamp, i32 %X, i32 %clamp_limit ret i32 %R } + +define i32 @t0_select_cond_and_v0_logical(i32 %X) { +; CHECK-LABEL: @t0_select_cond_and_v0_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 +; CHECK-NEXT: ret i32 [[R]] +; + %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 + %dont_need_to_clamp_negative = icmp sge i32 %X, -32768 + %clamp_limit = select i1 %dont_need_to_clamp_positive, i32 -32768, i32 32767 + %dont_need_to_clamp = select i1 %dont_need_to_clamp_positive, i1 %dont_need_to_clamp_negative, i1 false + %R = select i1 %dont_need_to_clamp, i32 %X, i32 %clamp_limit + ret i32 %R +} define i32 @t1_select_cond_and_v1(i32 %X) { ; CHECK-LABEL: @t1_select_cond_and_v1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 @@ -36,6 +52,22 @@ define i32 @t1_select_cond_and_v1(i32 %X) { ret i32 %R } +define i32 @t1_select_cond_and_v1_logical(i32 %X) { +; CHECK-LABEL: @t1_select_cond_and_v1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 +; CHECK-NEXT: ret i32 [[R]] +; + %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 + %dont_need_to_clamp_negative = icmp sge i32 %X, -32768 + %clamp_limit = select i1 %dont_need_to_clamp_negative, i32 32767, i32 -32768 + %dont_need_to_clamp = select i1 %dont_need_to_clamp_positive, i1 %dont_need_to_clamp_negative, i1 false + %R = select i1 %dont_need_to_clamp, i32 %X, i32 %clamp_limit + ret i32 %R +} + ;------------------------------------------------------------------------------- define i32 @t2_select_cond_or_v0(i32 %X) { @@ -53,6 +85,22 @@ define i32 @t2_select_cond_or_v0(i32 %X) { %R = select i1 %need_to_clamp, i32 %clamp_limit, i32 %X ret i32 %R } + +define i32 @t2_select_cond_or_v0_logical(i32 %X) { +; CHECK-LABEL: @t2_select_cond_or_v0_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 +; CHECK-NEXT: ret i32 [[R]] +; + %need_to_clamp_positive = icmp sgt i32 %X, 32767 + %need_to_clamp_negative = icmp slt i32 %X, -32768 + %clamp_limit = select i1 %need_to_clamp_positive, i32 32767, i32 -32768 + %need_to_clamp = select i1 %need_to_clamp_positive, i1 true, i1 %need_to_clamp_negative + %R = select i1 %need_to_clamp, i32 %clamp_limit, i32 %X + ret i32 %R +} define i32 @t3_select_cond_or_v1(i32 %X) { ; CHECK-LABEL: @t3_select_cond_or_v1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 @@ -69,12 +117,28 @@ define i32 @t3_select_cond_or_v1(i32 %X) { ret i32 %R } +define i32 @t3_select_cond_or_v1_logical(i32 %X) { +; CHECK-LABEL: @t3_select_cond_or_v1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 +; CHECK-NEXT: ret i32 [[R]] +; + %need_to_clamp_positive = icmp sgt i32 %X, 32767 + %need_to_clamp_negative = icmp slt i32 %X, -32768 + %clamp_limit = select i1 %need_to_clamp_negative, i32 -32768, i32 32767 + %need_to_clamp = select i1 %need_to_clamp_positive, i1 true, i1 %need_to_clamp_negative + %R = select i1 %need_to_clamp, i32 %clamp_limit, i32 %X + ret i32 %R +} + ;------------------------------------------------------------------------------- define i32 @t4_select_cond_xor_v0(i32 %X) { ; CHECK-LABEL: @t4_select_cond_xor_v0( -; CHECK-NEXT: [[DOTINV1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 ; CHECK-NEXT: ret i32 [[R]] @@ -88,8 +152,8 @@ define i32 @t4_select_cond_xor_v0(i32 %X) { } define i32 @t4_select_cond_xor_v1(i32 %X) { ; CHECK-LABEL: @t4_select_cond_xor_v1( -; CHECK-NEXT: [[DOTINV1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 ; CHECK-NEXT: ret i32 [[R]] @@ -104,8 +168,8 @@ define i32 @t4_select_cond_xor_v1(i32 %X) { define i32 @t5_select_cond_xor_v2(i32 %X) { ; CHECK-LABEL: @t5_select_cond_xor_v2( -; CHECK-NEXT: [[DOTINV1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 ; CHECK-NEXT: ret i32 [[R]] @@ -119,8 +183,8 @@ define i32 @t5_select_cond_xor_v2(i32 %X) { } define i32 @t5_select_cond_xor_v3(i32 %X) { ; CHECK-LABEL: @t5_select_cond_xor_v3( -; CHECK-NEXT: [[DOTINV1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV1]], i32 [[X]], i32 -32768 +; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 ; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 ; CHECK-NEXT: ret i32 [[R]] diff --git a/llvm/test/Transforms/InstCombine/demorgan.ll b/llvm/test/Transforms/InstCombine/demorgan.ll index 465621a24a54c4..809c43d1a09dfb 100644 --- a/llvm/test/Transforms/InstCombine/demorgan.ll +++ b/llvm/test/Transforms/InstCombine/demorgan.ll @@ -471,6 +471,22 @@ define i32 @PR28476(i32 %x, i32 %y) { ret i32 %cond } +define i32 @PR28476_logical(i32 %x, i32 %y) { +; CHECK-LABEL: @PR28476_logical( +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: [[COND:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[COND]] +; + %cmp0 = icmp ne i32 %x, 0 + %cmp1 = icmp ne i32 %y, 0 + %and = select i1 %cmp0, i1 %cmp1, i1 false + %zext = zext i1 %and to i32 + %cond = xor i32 %zext, 1 + ret i32 %cond +} + ; ~(~(a | b) | (a & b)) --> (a | b) & ~(a & b) -> a ^ b define i32 @demorgan_plus_and_to_xor(i32 %a, i32 %b) { diff --git a/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll b/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll index bc7ddacbed16c9..98d91c9b048f51 100644 --- a/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll +++ b/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; ; This test ensures that InstCombine does not distribute And over Xor @@ -5,6 +6,21 @@ define zeroext i1 @foo(i32 %arg) { ; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 37 +; CHECK-NEXT: br i1 [[CMP1]], label [[BB_THEN:%.*]], label [[BB_ELSE:%.*]] +; CHECK: bb_then: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[BB_EXIT:%.*]] +; CHECK: bb_else: +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[ARG]], 17 +; CHECK-NEXT: br label [[BB_EXIT]] +; CHECK: bb_exit: +; CHECK-NEXT: [[PHI1:%.*]] = phi i1 [ [[CMP2]], [[BB_ELSE]] ], [ undef, [[BB_THEN]] ] +; CHECK-NEXT: [[XOR1:%.*]] = xor i1 [[CMP1]], true +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[PHI1]], [[XOR1]] +; CHECK-NEXT: ret i1 [[AND1]] +; entry: %cmp1 = icmp eq i32 %arg, 37 @@ -18,15 +34,47 @@ bb_else: %cmp2 = icmp slt i32 %arg, 17 br label %bb_exit +bb_exit: + %phi1 = phi i1 [ %cmp2, %bb_else ], [ undef, %bb_then ] + %xor1 = xor i1 %cmp1, true + %and1 = and i1 %phi1, %xor1 + ret i1 %and1 +} + +define zeroext i1 @foo_logical(i32 %arg) { +; CHECK-LABEL: @foo_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 37 +; CHECK-NEXT: br i1 [[CMP1]], label [[BB_THEN:%.*]], label [[BB_ELSE:%.*]] +; CHECK: bb_then: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[BB_EXIT:%.*]] +; CHECK: bb_else: +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[ARG]], 17 +; CHECK-NEXT: br label [[BB_EXIT]] ; CHECK: bb_exit: -; CHECK-NEXT: [[PHI1:%.*]] = phi i1 [ [[CMP2:%.*]], [[BB_ELSE:%.*]] ], [ undef, [[BB_THEN:%.*]] ] -; CHECK-NEXT: [[XOR1:%.*]] = xor i1 [[CMP1:%.*]], true +; CHECK-NEXT: [[PHI1:%.*]] = phi i1 [ [[CMP2]], [[BB_ELSE]] ], [ undef, [[BB_THEN]] ] +; CHECK-NEXT: [[XOR1:%.*]] = xor i1 [[CMP1]], true ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[PHI1]], [[XOR1]] ; CHECK-NEXT: ret i1 [[AND1]] +; + +entry: + %cmp1 = icmp eq i32 %arg, 37 + br i1 %cmp1, label %bb_then, label %bb_else + +bb_then: + call void @bar() + br label %bb_exit + +bb_else: + %cmp2 = icmp slt i32 %arg, 17 + br label %bb_exit + bb_exit: %phi1 = phi i1 [ %cmp2, %bb_else ], [ undef, %bb_then ] %xor1 = xor i1 %cmp1, true - %and1 = and i1 %phi1, %xor1 + %and1 = select i1 %phi1, i1 %xor1, i1 false ret i1 %and1 } diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll index fc0c13a5f1a7a2..db3e7f3afb967f 100644 --- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll +++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll @@ -10,6 +10,14 @@ define i1 @f(i1 %x) { ret i1 %b } +define i1 @f_logical(i1 %x) { +; CHECK-LABEL: @f_logical( +; CHECK-NEXT: ret i1 false +; + %b = select i1 %x, i1 icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*)), i1 false + ret i1 %b +} + define i32 @g(i32 %x) { ; CHECK-LABEL: @g( ; CHECK-NEXT: ret i32 [[X:%.*]] diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index 1a1ec2e2024d10..2546ec387003f2 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -74,3 +74,15 @@ define void @or_select_multipleuses(i32 %x, i1 %y) { call void @use_i32_i1(i32 %a, i1 %b) ret void } + +define void @or_select_multipleuses_logical(i32 %x, i1 %y) { +; CHECK-LABEL: @or_select_multipleuses_logical( +; CHECK-NEXT: call void @use_i32_i1(i32 32, i1 [[Y:%.*]]) +; CHECK-NEXT: ret void +; + %f = freeze i1 undef + %a = select i1 %f, i32 %x, i32 32 ; prefers %f to be false + %b = select i1 %f, i1 true, i1 %y ; prefers %f to be true + call void @use_i32_i1(i32 %a, i1 %b) + ret void +} diff --git a/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll index 09a3b2b5ff4f7c..6e76525bad3503 100644 --- a/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll +++ b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll @@ -199,6 +199,24 @@ define i1 @icmp_and_ashr_multiuse(i32 %X) { ret i1 %and3 } +define i1 @icmp_and_ashr_multiuse_logical(i32 %X) { +; CHECK-LABEL: @icmp_and_ashr_multiuse_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 240 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 224 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], 496 +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp ne i32 [[TMP2]], 432 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]] +; CHECK-NEXT: ret i1 [[AND3]] +; + %shr = ashr i32 %X, 4 + %and = and i32 %shr, 15 + %and2 = and i32 %shr, 31 ; second use of the shift + %tobool = icmp ne i32 %and, 14 + %tobool2 = icmp ne i32 %and2, 27 + %and3 = select i1 %tobool, i1 %tobool2, i1 false + ret i1 %and3 +} + define i1 @icmp_lshr_and_overshift(i8 %X) { ; CHECK-LABEL: @icmp_lshr_and_overshift( ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ugt i8 [[X:%.*]], 31 diff --git a/llvm/test/Transforms/InstCombine/icmp-logical.ll b/llvm/test/Transforms/InstCombine/icmp-logical.ll index b5327df5e8e3d9..cc23b114bd01b9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-logical.ll +++ b/llvm/test/Transforms/InstCombine/icmp-logical.ll @@ -15,6 +15,20 @@ define i1 @masked_and_notallzeroes(i32 %A) { ret i1 %res } +define i1 @masked_and_notallzeroes_logical(i32 %A) { +; CHECK-LABEL: @masked_and_notallzeroes_logical( +; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[TST1:%.*]] = icmp ne i32 [[MASK1]], 0 +; CHECK-NEXT: ret i1 [[TST1]] +; + %mask1 = and i32 %A, 7 + %tst1 = icmp ne i32 %mask1, 0 + %mask2 = and i32 %A, 39 + %tst2 = icmp ne i32 %mask2, 0 + %res = select i1 %tst1, i1 %tst2, i1 false + ret i1 %res +} + define i1 @masked_or_allzeroes(i32 %A) { ; CHECK-LABEL: @masked_or_allzeroes( ; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 7 @@ -29,6 +43,20 @@ define i1 @masked_or_allzeroes(i32 %A) { ret i1 %res } +define i1 @masked_or_allzeroes_logical(i32 %A) { +; CHECK-LABEL: @masked_or_allzeroes_logical( +; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASK1]], 0 +; CHECK-NEXT: ret i1 [[TST1]] +; + %mask1 = and i32 %A, 7 + %tst1 = icmp eq i32 %mask1, 0 + %mask2 = and i32 %A, 39 + %tst2 = icmp eq i32 %mask2, 0 + %res = select i1 %tst1, i1 true, i1 %tst2 + ret i1 %res +} + define i1 @masked_and_notallones(i32 %A) { ; CHECK-LABEL: @masked_and_notallones( ; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 7 @@ -43,6 +71,20 @@ define i1 @masked_and_notallones(i32 %A) { ret i1 %res } +define i1 @masked_and_notallones_logical(i32 %A) { +; CHECK-LABEL: @masked_and_notallones_logical( +; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[TST1:%.*]] = icmp ne i32 [[MASK1]], 7 +; CHECK-NEXT: ret i1 [[TST1]] +; + %mask1 = and i32 %A, 7 + %tst1 = icmp ne i32 %mask1, 7 + %mask2 = and i32 %A, 39 + %tst2 = icmp ne i32 %mask2, 39 + %res = select i1 %tst1, i1 %tst2, i1 false + ret i1 %res +} + define i1 @masked_or_allones(i32 %A) { ; CHECK-LABEL: @masked_or_allones( ; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 7 @@ -57,6 +99,20 @@ define i1 @masked_or_allones(i32 %A) { ret i1 %res } +define i1 @masked_or_allones_logical(i32 %A) { +; CHECK-LABEL: @masked_or_allones_logical( +; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASK1]], 7 +; CHECK-NEXT: ret i1 [[TST1]] +; + %mask1 = and i32 %A, 7 + %tst1 = icmp eq i32 %mask1, 7 + %mask2 = and i32 %A, 39 + %tst2 = icmp eq i32 %mask2, 39 + %res = select i1 %tst1, i1 true, i1 %tst2 + ret i1 %res +} + define i1 @masked_and_notA(i32 %A) { ; CHECK-LABEL: @masked_and_notA( ; CHECK-NEXT: [[MASK2:%.*]] = and i32 [[A:%.*]], 78 @@ -71,6 +127,20 @@ define i1 @masked_and_notA(i32 %A) { ret i1 %res } +define i1 @masked_and_notA_logical(i32 %A) { +; CHECK-LABEL: @masked_and_notA_logical( +; CHECK-NEXT: [[MASK2:%.*]] = and i32 [[A:%.*]], 78 +; CHECK-NEXT: [[TST2:%.*]] = icmp ne i32 [[MASK2]], [[A]] +; CHECK-NEXT: ret i1 [[TST2]] +; + %mask1 = and i32 %A, 14 + %tst1 = icmp ne i32 %mask1, %A + %mask2 = and i32 %A, 78 + %tst2 = icmp ne i32 %mask2, %A + %res = select i1 %tst1, i1 %tst2, i1 false + ret i1 %res +} + define i1 @masked_and_notA_slightly_optimized(i32 %A) { ; CHECK-LABEL: @masked_and_notA_slightly_optimized( ; CHECK-NEXT: [[T0:%.*]] = icmp ugt i32 [[A:%.*]], 7 @@ -86,6 +156,21 @@ define i1 @masked_and_notA_slightly_optimized(i32 %A) { ret i1 %res } +define i1 @masked_and_notA_slightly_optimized_logical(i32 %A) { +; CHECK-LABEL: @masked_and_notA_slightly_optimized_logical( +; CHECK-NEXT: [[T0:%.*]] = icmp ugt i32 [[A:%.*]], 7 +; CHECK-NEXT: [[MASK2:%.*]] = and i32 [[A]], 39 +; CHECK-NEXT: [[TST2:%.*]] = icmp ne i32 [[MASK2]], [[A]] +; CHECK-NEXT: [[RES:%.*]] = and i1 [[T0]], [[TST2]] +; CHECK-NEXT: ret i1 [[RES]] +; + %t0 = icmp uge i32 %A, 8 + %mask2 = and i32 %A, 39 + %tst2 = icmp ne i32 %mask2, %A + %res = select i1 %t0, i1 %tst2, i1 false + ret i1 %res +} + define i1 @masked_or_A(i32 %A) { ; CHECK-LABEL: @masked_or_A( ; CHECK-NEXT: [[MASK2:%.*]] = and i32 [[A:%.*]], 78 @@ -100,6 +185,20 @@ define i1 @masked_or_A(i32 %A) { ret i1 %res } +define i1 @masked_or_A_logical(i32 %A) { +; CHECK-LABEL: @masked_or_A_logical( +; CHECK-NEXT: [[MASK2:%.*]] = and i32 [[A:%.*]], 78 +; CHECK-NEXT: [[TST2:%.*]] = icmp eq i32 [[MASK2]], [[A]] +; CHECK-NEXT: ret i1 [[TST2]] +; + %mask1 = and i32 %A, 14 + %tst1 = icmp eq i32 %mask1, %A + %mask2 = and i32 %A, 78 + %tst2 = icmp eq i32 %mask2, %A + %res = select i1 %tst1, i1 true, i1 %tst2 + ret i1 %res +} + define i1 @masked_or_A_slightly_optimized(i32 %A) { ; CHECK-LABEL: @masked_or_A_slightly_optimized( ; CHECK-NEXT: [[T0:%.*]] = icmp ult i32 [[A:%.*]], 8 @@ -115,6 +214,21 @@ define i1 @masked_or_A_slightly_optimized(i32 %A) { ret i1 %res } +define i1 @masked_or_A_slightly_optimized_logical(i32 %A) { +; CHECK-LABEL: @masked_or_A_slightly_optimized_logical( +; CHECK-NEXT: [[T0:%.*]] = icmp ult i32 [[A:%.*]], 8 +; CHECK-NEXT: [[MASK2:%.*]] = and i32 [[A]], 39 +; CHECK-NEXT: [[TST2:%.*]] = icmp eq i32 [[MASK2]], [[A]] +; CHECK-NEXT: [[RES:%.*]] = or i1 [[T0]], [[TST2]] +; CHECK-NEXT: ret i1 [[RES]] +; + %t0 = icmp ult i32 %A, 8 + %mask2 = and i32 %A, 39 + %tst2 = icmp eq i32 %mask2, %A + %res = select i1 %t0, i1 true, i1 %tst2 + ret i1 %res +} + define i1 @masked_or_allzeroes_notoptimised(i32 %A) { ; CHECK-LABEL: @masked_or_allzeroes_notoptimised( ; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 15 @@ -132,6 +246,23 @@ define i1 @masked_or_allzeroes_notoptimised(i32 %A) { ret i1 %res } +define i1 @masked_or_allzeroes_notoptimised_logical(i32 %A) { +; CHECK-LABEL: @masked_or_allzeroes_notoptimised_logical( +; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[A:%.*]], 15 +; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASK1]], 0 +; CHECK-NEXT: [[MASK2:%.*]] = and i32 [[A]], 39 +; CHECK-NEXT: [[TST2:%.*]] = icmp eq i32 [[MASK2]], 0 +; CHECK-NEXT: [[RES:%.*]] = or i1 [[TST1]], [[TST2]] +; CHECK-NEXT: ret i1 [[RES]] +; + %mask1 = and i32 %A, 15 + %tst1 = icmp eq i32 %mask1, 0 + %mask2 = and i32 %A, 39 + %tst2 = icmp eq i32 %mask2, 0 + %res = select i1 %tst1, i1 true, i1 %tst2 + ret i1 %res +} + define i1 @nomask_lhs(i32 %in) { ; CHECK-LABEL: @nomask_lhs( ; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[IN:%.*]], 1 @@ -145,6 +276,19 @@ define i1 @nomask_lhs(i32 %in) { ret i1 %val } +define i1 @nomask_lhs_logical(i32 %in) { +; CHECK-LABEL: @nomask_lhs_logical( +; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[IN:%.*]], 1 +; CHECK-NEXT: [[TST2:%.*]] = icmp eq i32 [[MASKED]], 0 +; CHECK-NEXT: ret i1 [[TST2]] +; + %tst1 = icmp eq i32 %in, 0 + %masked = and i32 %in, 1 + %tst2 = icmp eq i32 %masked, 0 + %val = select i1 %tst1, i1 true, i1 %tst2 + ret i1 %val +} + define i1 @nomask_rhs(i32 %in) { ; CHECK-LABEL: @nomask_rhs( ; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[IN:%.*]], 1 @@ -158,6 +302,19 @@ define i1 @nomask_rhs(i32 %in) { ret i1 %val } +define i1 @nomask_rhs_logical(i32 %in) { +; CHECK-LABEL: @nomask_rhs_logical( +; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[IN:%.*]], 1 +; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASKED]], 0 +; CHECK-NEXT: ret i1 [[TST1]] +; + %masked = and i32 %in, 1 + %tst1 = icmp eq i32 %masked, 0 + %tst2 = icmp eq i32 %in, 0 + %val = select i1 %tst1, i1 true, i1 %tst2 + ret i1 %val +} + ; TODO: This test simplifies to a constant, so the functionality and test could be in InstSimplify. define i1 @fold_mask_cmps_to_false(i32 %x) { @@ -171,6 +328,17 @@ define i1 @fold_mask_cmps_to_false(i32 %x) { ret i1 %t4 } +define i1 @fold_mask_cmps_to_false_logical(i32 %x) { +; CHECK-LABEL: @fold_mask_cmps_to_false_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = and i32 %x, 2147483647 + %t2 = icmp eq i32 %t1, 0 + %t3 = icmp eq i32 %x, 2147483647 + %t4 = select i1 %t3, i1 %t2, i1 false + ret i1 %t4 +} + ; TODO: This test simplifies to a constant, so the functionality and test could be in InstSimplify. define i1 @fold_mask_cmps_to_true(i32 %x) { @@ -184,6 +352,17 @@ define i1 @fold_mask_cmps_to_true(i32 %x) { ret i1 %t4 } +define i1 @fold_mask_cmps_to_true_logical(i32 %x) { +; CHECK-LABEL: @fold_mask_cmps_to_true_logical( +; CHECK-NEXT: ret i1 true +; + %t1 = and i32 %x, 2147483647 + %t2 = icmp ne i32 %t1, 0 + %t3 = icmp ne i32 %x, 2147483647 + %t4 = select i1 %t3, i1 true, i1 %t2 + ret i1 %t4 +} + ; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401 define i1 @cmpeq_bitwise(i8 %a, i8 %b, i8 %c, i8 %d) { @@ -232,6 +411,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_0(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_0_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_0_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 12 +; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 12 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp eq i32 %t3, 1 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 12) != 0 & (X & 7) == 1) -> (X & 15) == 9 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1( @@ -247,6 +443,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 12 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp eq i32 %t3, 1 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 14) != 0 & (X & 3) == 1) -> no change define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1b(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1b( @@ -265,6 +475,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 14 +; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 14 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp eq i32 %t3, 1 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 3) != 0 & (X & 7) == 0) -> false define i1 @masked_icmps_mask_notallzeros_bmask_mixed_2(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_2( @@ -278,6 +505,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_2(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_2_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_2_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = and i32 %x, 3 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp eq i32 %t3, 0 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 15) != 0 & (X & 7) == 0) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_3( @@ -293,6 +532,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 8 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 15 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp eq i32 %t3, 0 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 15) != 0 & (X & 3) == 0) -> no change define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3b(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_3b( @@ -311,6 +564,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_3b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 0 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 15 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp eq i32 %t3, 0 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 255) != 0 & (X & 15) == 8) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_4(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_4( @@ -326,6 +596,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_4(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_4_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_4_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 255 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 15) != 0 & (X & 15) == 8) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_5(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_5( @@ -341,6 +625,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_5(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_5_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_5_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 15 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 12) != 0 & (X & 15) == 8) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_6(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_6( @@ -356,6 +654,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_6(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_6_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_6_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 12 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 7) != 0 & (X & 15) == 8) -> false define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_7( @@ -369,6 +681,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_7_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = and i32 %x, 7 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + ; ((X & 6) != 0 & (X & 15) == 8) -> false define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7b(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_7b( @@ -382,7 +706,19 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7b(i32 %x) { ret i1 %t5 } -; ((X & 12) == 0 | (X & 3) != 1) -> !((X & 12) != 0 & (X & 3) == 1)) -> +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_7b_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = and i32 %x, 6 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + +; ((X & 12) == 0 | (X & 3) != 1) -> !((X & 12) != 0 & (X & 3) == 1)) -> ; no change define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_0(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_0( @@ -401,6 +737,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_0(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_0_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_0_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 12 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 12 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp ne i32 %t3, 1 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 12) == 0 | (X & 7) != 1) -> !((X & 12) != 0 & (X & 7) == 1) -> ; !((X & 15) == 9) -> (X & 15) != 9 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1(i32 %x) { @@ -417,6 +770,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 9 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 12 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp ne i32 %t3, 1 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 14) == 0 | (X & 3) != 1) -> !((X & 14) != 0 & (X & 3) == 1) -> ; no change. define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1b(i32 %x) { @@ -436,6 +803,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_1b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 14 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 14 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp ne i32 %t3, 1 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 3) == 0 | (X & 7) != 0) -> !((X & 3) != 0 & (X & 7) == 0) -> ; !(false) -> true define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_2(i32 %x) { @@ -450,6 +834,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_2(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_2_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_2_logical( +; CHECK-NEXT: ret i1 true +; + %t1 = and i32 %x, 3 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 15) == 0 | (X & 7) != 0) -> !((X & 15) != 0 & (X & 7) == 0) -> ; !((X & 15) == 8) -> (X & 15) != 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3(i32 %x) { @@ -466,6 +862,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 8 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 15 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 15) == 0 | (X & 3) != 0) -> !((X & 15) != 0 & (X & 3) == 0) -> ; no change. define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3b(i32 %x) { @@ -485,6 +895,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_3b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 0 +; CHECK-NEXT: [[T5:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 15 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 255) == 0 | (X & 15) != 8) -> !(((X & 255) != 0 & (X & 15) == 8)) -> ; !((X & 15) == 8) -> ((X & 15) != 8) define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_4(i32 %x) { @@ -501,6 +928,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_4(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_4_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_4_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 255 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 15) == 0 | (X & 15) != 8) -> !(((X & 15) != 0 & (X & 15) == 8)) -> ; !((X & 15) == 8) -> ((X & 15) != 8) define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_5(i32 %x) { @@ -517,6 +958,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_5(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_5_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_5_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 15 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 12) == 0 | (X & 15) != 8) -> !(((X & 12) != 0 & (X & 15) == 8)) -> ; !((X & 15) == 8) -> ((X & 15) != 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_6(i32 %x) { @@ -533,6 +988,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_6(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_6_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_6_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 12 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 7) == 0 | (X & 15) != 8) -> !(((X & 7) != 0 & (X & 15) == 8)) -> ; !(false) -> true define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7(i32 %x) { @@ -547,6 +1016,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_7_logical( +; CHECK-NEXT: ret i1 true +; + %t1 = and i32 %x, 7 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 6) == 0 | (X & 15) != 8) -> !(((X & 6) != 0 & (X & 15) == 8)) -> ; !(false) -> true define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7b(i32 %x) { @@ -561,6 +1042,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_7b_logical( +; CHECK-NEXT: ret i1 true +; + %t1 = and i32 %x, 6 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t2, i1 true, i1 %t4 + ret i1 %t5 +} + ; ((X & 12) != 0 & (X & 3) == 1) -> no change define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_0(i32 %x) { @@ -580,6 +1073,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_0(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_0_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_0_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 12 +; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T4]], [[T2]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 12 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp eq i32 %t3, 1 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 12) != 0 & (X & 7) == 1) -> (X & 15) == 9 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1( @@ -595,6 +1105,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 12 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp eq i32 %t3, 1 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 14) != 0 & (X & 3) == 1) -> no change define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1b(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1b( @@ -613,6 +1137,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 14 +; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T4]], [[T2]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 14 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp eq i32 %t3, 1 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 3) != 0 & (X & 7) == 0) -> false define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_2(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_2( @@ -626,6 +1167,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_2(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_2_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_2_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = and i32 %x, 3 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp eq i32 %t3, 0 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 15) != 0 & (X & 7) == 0) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3( @@ -641,6 +1194,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 8 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 15 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp eq i32 %t3, 0 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 15) != 0 & (X & 3) == 0) -> no change define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3b(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3b( @@ -659,6 +1226,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 0 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T4]], [[T2]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 15 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp eq i32 %t3, 0 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 255) != 0 & (X & 15) == 8) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_4(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_4( @@ -674,6 +1258,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_4(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_4_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_4_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 255 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 15) != 0 & (X & 15) == 8) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_5(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_5( @@ -689,6 +1287,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_5(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_5_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_5_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 15 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 12) != 0 & (X & 15) == 8) -> (X & 15) == 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_6(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_6( @@ -704,6 +1316,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_6(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_6_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_6_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 12 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 7) != 0 & (X & 15) == 8) -> false define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7( @@ -717,6 +1343,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = and i32 %x, 7 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 6) != 0 & (X & 15) == 8) -> false define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7b(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7b( @@ -730,6 +1368,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7b(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7b_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = and i32 %x, 6 + %t2 = icmp ne i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp eq i32 %t3, 8 + %t5 = select i1 %t4, i1 %t2, i1 false + ret i1 %t5 +} + ; ((X & 12) == 0 | (X & 3) != 1) -> !((X & 12) != 0 & (X & 3) == 1)) -> ; no change define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_0(i32 %x) { @@ -749,6 +1399,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_0(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_0_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_0_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 12 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = or i1 [[T4]], [[T2]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 12 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp ne i32 %t3, 1 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 12) == 0 | (X & 7) != 1) -> !((X & 12) != 0 & (X & 7) == 1) -> ; !((X & 15) == 9) -> (X & 15) != 9 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1(i32 %x) { @@ -765,6 +1432,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 9 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 12 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp ne i32 %t3, 1 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 14) == 0 | (X & 3) != 1) -> !((X & 14) != 0 & (X & 3) == 1) -> ; no change. define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1b(i32 %x) { @@ -784,6 +1465,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1b(i32 %x) ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 14 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 1 +; CHECK-NEXT: [[T5:%.*]] = or i1 [[T4]], [[T2]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 14 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp ne i32 %t3, 1 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 3) == 0 | (X & 7) != 0) -> !((X & 3) != 0 & (X & 7) == 0) -> ; !(false) -> true define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_2(i32 %x) { @@ -798,6 +1496,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_2(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_2_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_2_logical( +; CHECK-NEXT: ret i1 true +; + %t1 = and i32 %x, 3 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 15) == 0 | (X & 7) != 0) -> !((X & 15) != 0 & (X & 7) == 0) -> ; !((X & 15) == 8) -> (X & 15) != 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3(i32 %x) { @@ -814,6 +1524,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 8 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 %x, 15 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 7 + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 15) == 0 | (X & 3) != 0) -> !((X & 15) != 0 & (X & 3) == 0) -> ; no change. define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3b(i32 %x) { @@ -833,6 +1557,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3b(i32 %x) ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3b_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 0 +; CHECK-NEXT: [[T5:%.*]] = or i1 [[T4]], [[T2]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %x, 15 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 3 + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 255) == 0 | (X & 15) != 8) -> !(((X & 255) != 0 & (X & 15) == 8)) -> ; !((X & 15) == 8) -> ((X & 15) != 8) define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_4(i32 %x) { @@ -849,6 +1590,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_4(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_4_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_4_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 255 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 15) == 0 | (X & 15) != 8) -> !(((X & 15) != 0 & (X & 15) == 8)) -> ; !((X & 15) == 8) -> ((X & 15) != 8) define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_5(i32 %x) { @@ -865,6 +1620,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_5(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_5_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_5_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 15 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 12) == 0 | (X & 15) != 8) -> !(((X & 12) != 0 & (X & 15) == 8)) -> ; !((X & 15) == 8) -> ((X & 15) != 8 define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_6(i32 %x) { @@ -881,6 +1650,20 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_6(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_6_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_6_logical( +; CHECK-NEXT: [[T3:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 [[T3]], 8 +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = and i32 %x, 12 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 7) == 0 | (X & 15) != 8) -> !(((X & 7) != 0 & (X & 15) == 8)) -> ; !(false) -> true define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7(i32 %x) { @@ -895,6 +1678,18 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7(i32 %x) { ret i1 %t5 } +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7_logical( +; CHECK-NEXT: ret i1 true +; + %t1 = and i32 %x, 7 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} + ; ((X & 6) == 0 | (X & 15) != 8) -> !(((X & 6) != 0 & (X & 15) == 8)) -> ; !(false) -> true define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7b(i32 %x) { @@ -908,3 +1703,15 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7b(i32 %x) %t5 = or i1 %t4, %t2 ret i1 %t5 } + +define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7b_logical(i32 %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7b_logical( +; CHECK-NEXT: ret i1 true +; + %t1 = and i32 %x, 6 + %t2 = icmp eq i32 %t1, 0 + %t3 = and i32 %x, 15 + %t4 = icmp ne i32 %t3, 8 + %t5 = select i1 %t4, i1 true, i1 %t2 + ret i1 %t5 +} diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index b21077e2e2efcb..b48466e678d8c6 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -970,6 +970,22 @@ define i1 @test52(i32 %x1) { ret i1 %A } +define i1 @test52_logical(i32 %x1) { +; CHECK-LABEL: @test52_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X1:%.*]], 16711935 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 4980863 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %conv = and i32 %x1, 255 + %cmp = icmp eq i32 %conv, 127 + %i2 = lshr i32 %x1, 16 + %i3 = trunc i32 %i2 to i8 + %cmp15 = icmp eq i8 %i3, 76 + + %A = select i1 %cmp, i1 %cmp15, i1 false + ret i1 %A +} + define i1 @test52b(i128 %x1) { ; CHECK-LABEL: @test52b( ; CHECK-NEXT: [[TMP1:%.*]] = and i128 [[X1:%.*]], 16711935 @@ -986,6 +1002,22 @@ define i1 @test52b(i128 %x1) { ret i1 %A } +define i1 @test52b_logical(i128 %x1) { +; CHECK-LABEL: @test52b_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i128 [[X1:%.*]], 16711935 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i128 [[TMP1]], 4980863 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %conv = and i128 %x1, 255 + %cmp = icmp eq i128 %conv, 127 + %i2 = lshr i128 %x1, 16 + %i3 = trunc i128 %i2 to i8 + %cmp15 = icmp eq i8 %i3, 76 + + %A = select i1 %cmp, i1 %cmp15, i1 false + ret i1 %A +} + ; PR9838 define i1 @test53(i32 %a, i32 %b) { ; CHECK-LABEL: @test53( @@ -1841,6 +1873,24 @@ define i1 @icmp_and_shr_multiuse(i32 %X) { ret i1 %and3 } +define i1 @icmp_and_shr_multiuse_logical(i32 %X) { +; CHECK-LABEL: @icmp_and_shr_multiuse_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 240 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 224 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], 496 +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp ne i32 [[TMP2]], 432 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]] +; CHECK-NEXT: ret i1 [[AND3]] +; + %shr = lshr i32 %X, 4 + %and = and i32 %shr, 15 + %and2 = and i32 %shr, 31 ; second use of the shift + %tobool = icmp ne i32 %and, 14 + %tobool2 = icmp ne i32 %and2, 27 + %and3 = select i1 %tobool, i1 %tobool2, i1 false + ret i1 %and3 +} + ; Variation of the above with an ashr define i1 @icmp_and_ashr_multiuse(i32 %X) { ; CHECK-LABEL: @icmp_and_ashr_multiuse( @@ -1860,6 +1910,24 @@ define i1 @icmp_and_ashr_multiuse(i32 %X) { ret i1 %and3 } +define i1 @icmp_and_ashr_multiuse_logical(i32 %X) { +; CHECK-LABEL: @icmp_and_ashr_multiuse_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 240 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 224 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], 496 +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp ne i32 [[TMP2]], 432 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]] +; CHECK-NEXT: ret i1 [[AND3]] +; + %shr = ashr i32 %X, 4 + %and = and i32 %shr, 15 + %and2 = and i32 %shr, 31 ; second use of the shift + %tobool = icmp ne i32 %and, 14 + %tobool2 = icmp ne i32 %and2, 27 + %and3 = select i1 %tobool, i1 %tobool2, i1 false + ret i1 %and3 +} + define i1 @icmp_lshr_and_overshift(i8 %X) { ; CHECK-LABEL: @icmp_lshr_and_overshift( ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ugt i8 [[X:%.*]], 31 @@ -2162,6 +2230,18 @@ define i1 @or_icmp_eq_B_0_icmp_ult_A_B(i64 %a, i64 %b) { ret i1 %3 } +define i1 @or_icmp_eq_B_0_icmp_ult_A_B_logical(i64 %a, i64 %b) { +; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_logical( +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[B:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = icmp eq i64 %b, 0 + %2 = icmp ult i64 %a, %b + %3 = select i1 %1, i1 true, i1 %2 + ret i1 %3 +} + define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_uniform( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], @@ -2198,6 +2278,18 @@ define i1 @or_icmp_ne_A_0_icmp_ne_B_0(i64 %a, i64 %b) { ret i1 %3 } +define i1 @or_icmp_ne_A_0_icmp_ne_B_0_logical(i64 %a, i64 %b) { +; CHECK-LABEL: @or_icmp_ne_A_0_icmp_ne_B_0_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = icmp ne i64 %a, 0 + %2 = icmp ne i64 %b, 0 + %3 = select i1 %1, i1 true, i1 %2 + ret i1 %3 +} + define <2 x i1> @or_icmp_ne_A_0_icmp_ne_B_0_uniform(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @or_icmp_ne_A_0_icmp_ne_B_0_uniform( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i64> [[A:%.*]], [[B:%.*]] diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll index f0d4a80ccfb04b..c54c6271ec07d5 100644 --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -3,7 +3,7 @@ define i1 @is_pow2or0_negate_op(i32 %x) { ; CHECK-LABEL: @is_pow2or0_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0:!range !.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -27,7 +27,7 @@ define <2 x i1> @is_pow2or0_negate_op_vec(<2 x i32> %x) { define i1 @is_pow2or0_decrement_op(i8 %x) { ; CHECK-LABEL: @is_pow2or0_decrement_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range !1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), [[RNG1:!range !.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -51,7 +51,7 @@ define <2 x i1> @is_pow2or0_decrement_op_vec(<2 x i8> %x) { define i1 @isnot_pow2or0_negate_op(i32 %x) { ; CHECK-LABEL: @isnot_pow2or0_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -75,7 +75,7 @@ define <2 x i1> @isnot_pow2or0_negate_op_vec(<2 x i32> %x) { define i1 @isnot_pow2or0_decrement_op(i8 %x) { ; CHECK-LABEL: @isnot_pow2or0_decrement_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range !1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), [[RNG1]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -100,7 +100,7 @@ define <2 x i1> @isnot_pow2or0_decrement_op_vec(<2 x i8> %x) { define i1 @is_pow2or0_negate_op_commute1(i32 %p) { ; CHECK-LABEL: @is_pow2or0_negate_op_commute1( ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[P:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !2 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), [[RNG2:!range !.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -116,7 +116,7 @@ define i1 @is_pow2or0_negate_op_commute1(i32 %p) { define i1 @isnot_pow2or0_negate_op_commute2(i32 %p) { ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute2( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[P:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !3 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), [[RNG3:!range !.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -130,7 +130,7 @@ define i1 @isnot_pow2or0_negate_op_commute2(i32 %p) { define i1 @isnot_pow2or0_negate_op_commute3(i32 %p) { ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute3( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[P:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !3 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), [[RNG3]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -147,7 +147,7 @@ define i1 @is_pow2or0_negate_op_extra_use1(i32 %x) { ; CHECK-LABEL: @is_pow2or0_negate_op_extra_use1( ; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[X:%.*]] ; CHECK-NEXT: call void @use(i32 [[NEG]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -180,7 +180,7 @@ declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>) define i1 @is_pow2_ctpop(i32 %x) { ; CHECK-LABEL: @is_pow2_ctpop( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[T0]], 1 ; CHECK-NEXT: ret i1 [[TMP1]] ; @@ -191,12 +191,25 @@ define i1 @is_pow2_ctpop(i32 %x) { ret i1 %r } +define i1 @is_pow2_ctpop_logical(i32 %x) { +; CHECK-LABEL: @is_pow2_ctpop_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[T0]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ult i32 %t0, 2 + %notzero = icmp ne i32 %x, 0 + %r = select i1 %notzero, i1 %cmp, i1 false + ret i1 %r +} + ; Extra uses don't change the fold. declare void @use_i1(i1) define i1 @is_pow2_ctpop_extra_uses(i32 %x) { ; CHECK-LABEL: @is_pow2_ctpop_extra_uses( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 2 ; CHECK-NEXT: call void @use_i1(i1 [[CMP]]) ; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 @@ -213,6 +226,25 @@ define i1 @is_pow2_ctpop_extra_uses(i32 %x) { ret i1 %r } +define i1 @is_pow2_ctpop_extra_uses_logical(i32 %x) { +; CHECK-LABEL: @is_pow2_ctpop_extra_uses_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 2 +; CHECK-NEXT: call void @use_i1(i1 [[CMP]]) +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: call void @use_i1(i1 [[NOTZERO]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[T0]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ult i32 %t0, 2 + call void @use_i1(i1 %cmp) + %notzero = icmp ne i32 %x, 0 + call void @use_i1(i1 %notzero) + %r = select i1 %notzero, i1 %cmp, i1 false + ret i1 %r +} + ; Test vector type and commuted 'and' operands. define <2 x i1> @is_pow2_ctpop_commute_vec(<2 x i8> %x) { @@ -232,7 +264,7 @@ define <2 x i1> @is_pow2_ctpop_commute_vec(<2 x i8> %x) { define i1 @is_pow2_ctpop_wrong_cmp_op1(i32 %x) { ; CHECK-LABEL: @is_pow2_ctpop_wrong_cmp_op1( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 3 ; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] @@ -245,11 +277,26 @@ define i1 @is_pow2_ctpop_wrong_cmp_op1(i32 %x) { ret i1 %r } +define i1 @is_pow2_ctpop_wrong_cmp_op1_logical(i32 %x) { +; CHECK-LABEL: @is_pow2_ctpop_wrong_cmp_op1_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 3 +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ult i32 %t0, 3 + %notzero = icmp ne i32 %x, 0 + %r = select i1 %notzero, i1 %cmp, i1 false + ret i1 %r +} + ; Negative test - wrong constant. define i1 @is_pow2_ctpop_wrong_cmp_op2(i32 %x) { ; CHECK-LABEL: @is_pow2_ctpop_wrong_cmp_op2( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 2 ; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 1 ; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] @@ -262,11 +309,26 @@ define i1 @is_pow2_ctpop_wrong_cmp_op2(i32 %x) { ret i1 %r } +define i1 @is_pow2_ctpop_wrong_cmp_op2_logical(i32 %x) { +; CHECK-LABEL: @is_pow2_ctpop_wrong_cmp_op2_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 2 +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ult i32 %t0, 2 + %notzero = icmp ne i32 %x, 1 + %r = select i1 %notzero, i1 %cmp, i1 false + ret i1 %r +} + ; Negative test - wrong predicate. define i1 @is_pow2_ctpop_wrong_pred1(i32 %x) { ; CHECK-LABEL: @is_pow2_ctpop_wrong_pred1( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 2 ; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] @@ -279,11 +341,26 @@ define i1 @is_pow2_ctpop_wrong_pred1(i32 %x) { ret i1 %r } +define i1 @is_pow2_ctpop_wrong_pred1_logical(i32 %x) { +; CHECK-LABEL: @is_pow2_ctpop_wrong_pred1_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 2 +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ugt i32 %t0, 2 + %notzero = icmp ne i32 %x, 0 + %r = select i1 %notzero, i1 %cmp, i1 false + ret i1 %r +} + ; Negative test - wrong predicate. define i1 @is_pow2_ctpop_wrong_pred2(i32 %x) { ; CHECK-LABEL: @is_pow2_ctpop_wrong_pred2( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 2 ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0 ; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP2]], [[CMP]] @@ -296,11 +373,26 @@ define i1 @is_pow2_ctpop_wrong_pred2(i32 %x) { ret i1 %r } +define i1 @is_pow2_ctpop_wrong_pred2_logical(i32 %x) { +; CHECK-LABEL: @is_pow2_ctpop_wrong_pred2_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 2 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP2]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ult i32 %t0, 2 + %cmp2 = icmp sgt i32 %x, 0 + %r = select i1 %cmp2, i1 %cmp, i1 false + ret i1 %r +} + ; (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1 define i1 @isnot_pow2_ctpop(i32 %x) { ; CHECK-LABEL: @isnot_pow2_ctpop( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[T0]], 1 ; CHECK-NEXT: ret i1 [[TMP1]] ; @@ -311,11 +403,24 @@ define i1 @isnot_pow2_ctpop(i32 %x) { ret i1 %r } +define i1 @isnot_pow2_ctpop_logical(i32 %x) { +; CHECK-LABEL: @isnot_pow2_ctpop_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[T0]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ugt i32 %t0, 1 + %iszero = icmp eq i32 %x, 0 + %r = select i1 %iszero, i1 true, i1 %cmp + ret i1 %r +} + ; Extra uses don't change the fold. define i1 @isnot_pow2_ctpop_extra_uses(i32 %x) { ; CHECK-LABEL: @isnot_pow2_ctpop_extra_uses( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 1 ; CHECK-NEXT: call void @use_i1(i1 [[CMP]]) ; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 @@ -332,6 +437,25 @@ define i1 @isnot_pow2_ctpop_extra_uses(i32 %x) { ret i1 %r } +define i1 @isnot_pow2_ctpop_extra_uses_logical(i32 %x) { +; CHECK-LABEL: @isnot_pow2_ctpop_extra_uses_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 1 +; CHECK-NEXT: call void @use_i1(i1 [[CMP]]) +; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: call void @use_i1(i1 [[ISZERO]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[T0]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ugt i32 %t0, 1 + call void @use_i1(i1 %cmp) + %iszero = icmp eq i32 %x, 0 + call void @use_i1(i1 %iszero) + %r = select i1 %iszero, i1 true, i1 %cmp + ret i1 %r +} + ; Test vector type and commuted 'or' operands. define <2 x i1> @isnot_pow2_ctpop_commute_vec(<2 x i8> %x) { @@ -351,7 +475,7 @@ define <2 x i1> @isnot_pow2_ctpop_commute_vec(<2 x i8> %x) { define i1 @isnot_pow2_ctpop_wrong_cmp_op1(i32 %x) { ; CHECK-LABEL: @isnot_pow2_ctpop_wrong_cmp_op1( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 2 ; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 ; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] @@ -364,11 +488,26 @@ define i1 @isnot_pow2_ctpop_wrong_cmp_op1(i32 %x) { ret i1 %r } +define i1 @isnot_pow2_ctpop_wrong_cmp_op1_logical(i32 %x) { +; CHECK-LABEL: @isnot_pow2_ctpop_wrong_cmp_op1_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 2 +; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ugt i32 %t0, 2 + %iszero = icmp eq i32 %x, 0 + %r = select i1 %iszero, i1 true, i1 %cmp + ret i1 %r +} + ; Negative test - wrong constant. define i1 @isnot_pow2_ctpop_wrong_cmp_op2(i32 %x) { ; CHECK-LABEL: @isnot_pow2_ctpop_wrong_cmp_op2( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 1 ; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 1 ; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] @@ -381,11 +520,26 @@ define i1 @isnot_pow2_ctpop_wrong_cmp_op2(i32 %x) { ret i1 %r } +define i1 @isnot_pow2_ctpop_wrong_cmp_op2_logical(i32 %x) { +; CHECK-LABEL: @isnot_pow2_ctpop_wrong_cmp_op2_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 1 +; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ugt i32 %t0, 1 + %iszero = icmp eq i32 %x, 1 + %r = select i1 %iszero, i1 true, i1 %cmp + ret i1 %r +} + ; Negative test - wrong predicate (but this could reduce). define i1 @isnot_pow2_ctpop_wrong_pred1(i32 %x) { ; CHECK-LABEL: @isnot_pow2_ctpop_wrong_pred1( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T0]], 1 ; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 ; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] @@ -398,11 +552,26 @@ define i1 @isnot_pow2_ctpop_wrong_pred1(i32 %x) { ret i1 %r } +define i1 @isnot_pow2_ctpop_wrong_pred1_logical(i32 %x) { +; CHECK-LABEL: @isnot_pow2_ctpop_wrong_pred1_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T0]], 1 +; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp eq i32 %t0, 1 + %iszero = icmp eq i32 %x, 0 + %r = select i1 %iszero, i1 true, i1 %cmp + ret i1 %r +} + ; Negative test - wrong predicate. define i1 @isnot_pow2_ctpop_wrong_pred2(i32 %x) { ; CHECK-LABEL: @isnot_pow2_ctpop_wrong_pred2( -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], 0 ; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP2]], [[CMP]] @@ -415,9 +584,24 @@ define i1 @isnot_pow2_ctpop_wrong_pred2(i32 %x) { ret i1 %r } +define i1 @isnot_pow2_ctpop_wrong_pred2_logical(i32 %x) { +; CHECK-LABEL: @isnot_pow2_ctpop_wrong_pred2_logical( +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP2]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; + %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ugt i32 %t0, 1 + %cmp2 = icmp slt i32 %x, 0 + %r = select i1 %cmp2, i1 true, i1 %cmp + ret i1 %r +} + define i1 @is_pow2_negate_op(i32 %x) { ; CHECK-LABEL: @is_pow2_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -429,6 +613,20 @@ define i1 @is_pow2_negate_op(i32 %x) { ret i1 %r } +define i1 @is_pow2_negate_op_logical(i32 %x) { +; CHECK-LABEL: @is_pow2_negate_op_logical( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %neg = sub i32 0, %x + %and = and i32 %neg, %x + %cmp = icmp eq i32 %and, %x + %notzero = icmp ne i32 %x, 0 + %r = select i1 %notzero, i1 %cmp, i1 false + ret i1 %r +} + define <2 x i1> @is_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @is_pow2_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) @@ -445,7 +643,7 @@ define <2 x i1> @is_pow2_negate_op_vec(<2 x i32> %x) { define i1 @is_pow2_decrement_op(i8 %x) { ; CHECK-LABEL: @is_pow2_decrement_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range !1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), [[RNG1]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -457,6 +655,20 @@ define i1 @is_pow2_decrement_op(i8 %x) { ret i1 %r } +define i1 @is_pow2_decrement_op_logical(i8 %x) { +; CHECK-LABEL: @is_pow2_decrement_op_logical( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), [[RNG1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %dec = add i8 %x, -1 + %and = and i8 %dec, %x + %cmp = icmp eq i8 %and, 0 + %notzero = icmp ne i8 %x, 0 + %r = select i1 %cmp, i1 %notzero, i1 false + ret i1 %r +} + define <2 x i1> @is_pow2_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) @@ -473,7 +685,7 @@ define <2 x i1> @is_pow2_decrement_op_vec(<2 x i8> %x) { define i1 @isnot_pow2_negate_op(i32 %x) { ; CHECK-LABEL: @isnot_pow2_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -485,6 +697,20 @@ define i1 @isnot_pow2_negate_op(i32 %x) { ret i1 %r } +define i1 @isnot_pow2_negate_op_logical(i32 %x) { +; CHECK-LABEL: @isnot_pow2_negate_op_logical( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), [[RNG0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %neg = sub i32 0, %x + %and = and i32 %neg, %x + %cmp = icmp ne i32 %and, %x + %iszero = icmp eq i32 %x, 0 + %r = select i1 %cmp, i1 true, i1 %iszero + ret i1 %r +} + define <2 x i1> @isnot_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @isnot_pow2_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) @@ -501,7 +727,7 @@ define <2 x i1> @isnot_pow2_negate_op_vec(<2 x i32> %x) { define i1 @isnot_pow2_decrement_op(i8 %x) { ; CHECK-LABEL: @isnot_pow2_decrement_op( -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), !range !1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), [[RNG1]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -513,6 +739,20 @@ define i1 @isnot_pow2_decrement_op(i8 %x) { ret i1 %r } +define i1 @isnot_pow2_decrement_op_logical(i8 %x) { +; CHECK-LABEL: @isnot_pow2_decrement_op_logical( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ctpop.i8(i8 [[X:%.*]]), [[RNG1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %dec = add i8 %x, -1 + %and = and i8 %dec, %x + %cmp = icmp ne i8 %and, 0 + %iszero = icmp eq i8 %x, 0 + %r = select i1 %iszero, i1 true, i1 %cmp + ret i1 %r +} + define <2 x i1> @isnot_pow2_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll index 2f448ce1c740fe..f67cb024c2e32e 100644 --- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll @@ -376,6 +376,18 @@ define i1 @bools(i1 %a, i1 %b, i1 %c) { ret i1 %or } +define i1 @bools_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_logical( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %not = xor i1 %c, -1 + %and1 = select i1 %not, i1 %a, i1 false + %and2 = select i1 %c, i1 %b, i1 false + %or = select i1 %and1, i1 true, i1 %and2 + ret i1 %or +} + ; Form a select if we know we can get replace 2 simple logic ops. define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) { @@ -394,6 +406,22 @@ define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) { ret i1 %xor } +define i1 @bools_multi_uses1_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_multi_uses1_logical( +; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[C:%.*]], true +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 [[A]] +; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]] +; CHECK-NEXT: ret i1 [[XOR]] +; + %not = xor i1 %c, -1 + %and1 = select i1 %not, i1 %a, i1 false + %and2 = select i1 %c, i1 %b, i1 false + %or = select i1 %and1, i1 true, i1 %and2 + %xor = xor i1 %or, %and1 + ret i1 %xor +} + ; Don't replace a cheap logic op with a potentially expensive select ; unless we can also eliminate one of the other original ops. @@ -411,6 +439,20 @@ define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) { ret i1 %and3 } +define i1 @bools_multi_uses2_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_multi_uses2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %not = xor i1 %c, -1 + %and1 = select i1 %not, i1 %a, i1 false + %and2 = select i1 %c, i1 %b, i1 false + %or = select i1 %and1, i1 true, i1 %and2 + %add = add i1 %and1, %and2 + %and3 = select i1 %or, i1 %add, i1 false + ret i1 %and3 +} + define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { ; CHECK-LABEL: @vec_of_bools( ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[B:%.*]], <4 x i1> [[A:%.*]] diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index 2f532be03fbfc3..5c16fc446cdda7 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -376,6 +376,18 @@ define i1 @bools(i1 %a, i1 %b, i1 %c) { ret i1 %or } +define i1 @bools_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_logical( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %not = xor i1 %c, -1 + %and1 = select i1 %not, i1 %a, i1 false + %and2 = select i1 %c, i1 %b, i1 false + %or = select i1 %and1, i1 true, i1 %and2 + ret i1 %or +} + ; Form a select if we know we can get replace 2 simple logic ops. define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) { @@ -394,6 +406,22 @@ define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) { ret i1 %xor } +define i1 @bools_multi_uses1_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_multi_uses1_logical( +; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[C:%.*]], true +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 [[A]] +; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]] +; CHECK-NEXT: ret i1 [[XOR]] +; + %not = xor i1 %c, -1 + %and1 = select i1 %not, i1 %a, i1 false + %and2 = select i1 %c, i1 %b, i1 false + %or = select i1 %and1, i1 true, i1 %and2 + %xor = xor i1 %or, %and1 + ret i1 %xor +} + ; Don't replace a cheap logic op with a potentially expensive select ; unless we can also eliminate one of the other original ops. @@ -411,6 +439,20 @@ define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) { ret i1 %and3 } +define i1 @bools_multi_uses2_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_multi_uses2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %not = xor i1 %c, -1 + %and1 = select i1 %not, i1 %a, i1 false + %and2 = select i1 %c, i1 %b, i1 false + %or = select i1 %and1, i1 true, i1 %and2 + %add = add i1 %and1, %and2 + %and3 = select i1 %or, i1 %add, i1 false + ret i1 %and3 +} + define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { ; CHECK-LABEL: @vec_of_bools( ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[B:%.*]], <4 x i1> [[A:%.*]] diff --git a/llvm/test/Transforms/InstCombine/merge-icmp.ll b/llvm/test/Transforms/InstCombine/merge-icmp.ll index 6a65b5befa38f5..e9f9bb31a0e4f1 100644 --- a/llvm/test/Transforms/InstCombine/merge-icmp.ll +++ b/llvm/test/Transforms/InstCombine/merge-icmp.ll @@ -1,6 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -instcombine < %s | FileCheck %s define i1 @test1(i16* %x) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[LOAD:%.*]] = load i16, i16* [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[LOAD]], 17791 +; CHECK-NEXT: ret i1 [[TMP1]] +; %load = load i16, i16* %x, align 4 %trunc = trunc i16 %load to i8 %cmp1 = icmp eq i8 %trunc, 127 @@ -8,13 +14,29 @@ define i1 @test1(i16* %x) { %cmp2 = icmp eq i16 %and, 17664 %or = and i1 %cmp1, %cmp2 ret i1 %or -; CHECK-LABEL: @test1( -; CHECK-NEXT: load i16 -; CHECK-NEXT: icmp eq i16 %load, 17791 -; CHECK-NEXT: ret i1 +} + +define i1 @test1_logical(i16* %x) { +; CHECK-LABEL: @test1_logical( +; CHECK-NEXT: [[LOAD:%.*]] = load i16, i16* [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[LOAD]], 17791 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %load = load i16, i16* %x, align 4 + %trunc = trunc i16 %load to i8 + %cmp1 = icmp eq i8 %trunc, 127 + %and = and i16 %load, -256 + %cmp2 = icmp eq i16 %and, 17664 + %or = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %or } define i1 @test2(i16* %x) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[LOAD:%.*]] = load i16, i16* [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[LOAD]], 32581 +; CHECK-NEXT: ret i1 [[TMP1]] +; %load = load i16, i16* %x, align 4 %and = and i16 %load, -256 %cmp1 = icmp eq i16 %and, 32512 @@ -22,8 +44,19 @@ define i1 @test2(i16* %x) { %cmp2 = icmp eq i8 %trunc, 69 %or = and i1 %cmp1, %cmp2 ret i1 %or -; CHECK-LABEL: @test2( -; CHECK-NEXT: load i16 -; CHECK-NEXT: icmp eq i16 %load, 32581 -; CHECK-NEXT: ret i1 +} + +define i1 @test2_logical(i16* %x) { +; CHECK-LABEL: @test2_logical( +; CHECK-NEXT: [[LOAD:%.*]] = load i16, i16* [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[LOAD]], 32581 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %load = load i16, i16* %x, align 4 + %and = and i16 %load, -256 + %cmp1 = icmp eq i16 %and, 32512 + %trunc = trunc i16 %load to i8 + %cmp2 = icmp eq i8 %trunc, 69 + %or = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %or } diff --git a/llvm/test/Transforms/InstCombine/objsize-noverify.ll b/llvm/test/Transforms/InstCombine/objsize-noverify.ll index 7e469bd2528275..f1d0392c1845c2 100644 --- a/llvm/test/Transforms/InstCombine/objsize-noverify.ll +++ b/llvm/test/Transforms/InstCombine/objsize-noverify.ll @@ -24,6 +24,24 @@ return: ret i32 42 } +define i32 @PR13390_logical(i1 %bool, i8* %a) { +entry: + %cond = select i1 %bool, i1 true, i1 true + br i1 %cond, label %return, label %xpto + +xpto: + %select = select i1 %bool, i8* %select, i8* %a + %select2 = select i1 %bool, i8* %a, i8* %select2 + %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true) + %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true) + %2 = add i32 %0, %1 +; CHECK: ret i32 undef + ret i32 %2 + +return: + ret i32 42 +} + ; CHECK-LABEL: @PR13621( define i32 @PR13621(i1 %bool) nounwind { entry: @@ -41,3 +59,20 @@ xpto: return: ret i32 7 } + +define i32 @PR13621_logical(i1 %bool) nounwind { +entry: + %cond = select i1 %bool, i1 true, i1 true + br i1 %cond, label %return, label %xpto + +; technically reachable, but this malformed IR may appear as a result of constant propagation +xpto: + %gep2 = getelementptr i8, i8* %gep, i32 1 + %gep = getelementptr i8, i8* %gep2, i32 1 + %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true) +; CHECK: ret i32 undef + ret i32 %o + +return: + ret i32 7 +} diff --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll index d98361f1b5f615..bc0047e7a84a51 100644 --- a/llvm/test/Transforms/InstCombine/onehot_merge.ll +++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll @@ -15,6 +15,20 @@ define i1 @and_consts(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @and_consts_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @and_consts_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[K:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 12 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 4, %k + %t2 = icmp eq i32 %t1, 0 + %t5 = and i32 8, %k + %t6 = icmp eq i32 %t5, 0 + %or = select i1 %t2, i1 true, i1 %t6 + ret i1 %or +} + define <2 x i1> @and_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @and_consts_vector( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], @@ -48,6 +62,25 @@ define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @foo1_and_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_and_logical( +; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T]], [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t = shl i32 1, %c1 + %t4 = shl i32 1, %c2 + %t1 = and i32 %t, %k + %t2 = icmp eq i32 %t1, 0 + %t5 = and i32 %t4, %k + %t6 = icmp eq i32 %t5, 0 + %or = select i1 %t2, i1 true, i1 %t6 + ret i1 %or +} + define <2 x i1> @foo1_and_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_vector( ; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> , [[C1:%.*]] @@ -89,6 +122,27 @@ define i1 @foo1_and_commuted(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @foo1_and_commuted_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_and_commuted_logical( +; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]] +; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T]], [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[K2]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %k2 = mul i32 %k, %k ; to trick the complexity sorting + %t = shl i32 1, %c1 + %t4 = shl i32 1, %c2 + %t1 = and i32 %k2, %t + %t2 = icmp eq i32 %t1, 0 + %t5 = and i32 %t4, %k2 + %t6 = icmp eq i32 %t5, 0 + %or = select i1 %t2, i1 true, i1 %t6 + ret i1 %or +} + define <2 x i1> @foo1_and_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_commuted_vector( ; CHECK-NEXT: [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]] @@ -124,6 +178,20 @@ define i1 @or_consts(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @or_consts_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @or_consts_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[K:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 12 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t1 = and i32 4, %k + %t2 = icmp ne i32 %t1, 0 + %t5 = and i32 8, %k + %t6 = icmp ne i32 %t5, 0 + %or = select i1 %t2, i1 %t6, i1 false + ret i1 %or +} + define <2 x i1> @or_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @or_consts_vector( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], @@ -157,6 +225,25 @@ define i1 @foo1_or(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @foo1_or_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_or_logical( +; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T]], [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t = shl i32 1, %c1 + %t4 = shl i32 1, %c2 + %t1 = and i32 %t, %k + %t2 = icmp ne i32 %t1, 0 + %t5 = and i32 %t4, %k + %t6 = icmp ne i32 %t5, 0 + %or = select i1 %t2, i1 %t6, i1 false + ret i1 %or +} + define <2 x i1> @foo1_or_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_vector( ; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> , [[C1:%.*]] @@ -198,6 +285,27 @@ define i1 @foo1_or_commuted(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @foo1_or_commuted_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_or_commuted_logical( +; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]] +; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T]], [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[K2]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %k2 = mul i32 %k, %k ; to trick the complexity sorting + %t = shl i32 1, %c1 + %t4 = shl i32 1, %c2 + %t1 = and i32 %k2, %t + %t2 = icmp ne i32 %t1, 0 + %t5 = and i32 %t4, %k2 + %t6 = icmp ne i32 %t5, 0 + %or = select i1 %t2, i1 %t6, i1 false + ret i1 %or +} + define <2 x i1> @foo1_or_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_commuted_vector( ; CHECK-NEXT: [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]] @@ -238,6 +346,25 @@ define i1 @foo1_and_signbit_lshr(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @foo1_and_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_and_signbit_lshr_logical( +; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = lshr i32 -2147483648, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T]], [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t = shl i32 1, %c1 + %t4 = lshr i32 -2147483648, %c2 + %t1 = and i32 %t, %k + %t2 = icmp eq i32 %t1, 0 + %t5 = and i32 %t4, %k + %t6 = icmp eq i32 %t5, 0 + %or = select i1 %t2, i1 true, i1 %t6 + ret i1 %or +} + define <2 x i1> @foo1_and_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_signbit_lshr_vector( ; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> , [[C1:%.*]] @@ -276,6 +403,25 @@ define i1 @foo1_or_signbit_lshr(i32 %k, i32 %c1, i32 %c2) { ret i1 %or } +define i1 @foo1_or_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_or_signbit_lshr_logical( +; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = lshr i32 -2147483648, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T]], [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t = shl i32 1, %c1 + %t4 = lshr i32 -2147483648, %c2 + %t1 = and i32 %t, %k + %t2 = icmp ne i32 %t1, 0 + %t5 = and i32 %t4, %k + %t6 = icmp ne i32 %t5, 0 + %or = select i1 %t2, i1 %t6, i1 false + ret i1 %or +} + define <2 x i1> @foo1_or_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_signbit_lshr_vector( ; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> , [[C1:%.*]] @@ -315,6 +461,25 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit(i32 %k, i32 %c1, i32 % ret i1 %or } +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T3]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 1, %c1 + %t1 = and i32 %t0, %k + %t2 = icmp eq i32 %t1, 0 + %t3 = shl i32 %k, %c2 + %t4 = icmp sgt i32 %t3, -1 + %or = select i1 %t2, i1 true, i1 %t4 + ret i1 %or +} + define i1 @foo1_or_signbit_lshr_without_shifting_signbit(i32 %k, i32 %c1, i32 %c2) { ; CHECK-LABEL: @foo1_or_signbit_lshr_without_shifting_signbit( ; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] @@ -334,6 +499,25 @@ define i1 @foo1_or_signbit_lshr_without_shifting_signbit(i32 %k, i32 %c1, i32 %c ret i1 %or } +define i1 @foo1_or_signbit_lshr_without_shifting_signbit_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_or_signbit_lshr_without_shifting_signbit_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = icmp slt i32 [[T3]], 0 +; CHECK-NEXT: [[OR:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 1, %c1 + %t1 = and i32 %t0, %k + %t2 = icmp ne i32 %t1, 0 + %t3 = shl i32 %k, %c2 + %t4 = icmp slt i32 %t3, 0 + %or = select i1 %t2, i1 %t4, i1 false + ret i1 %or +} + ; Shift-of-signbit replaced with 'icmp s*' for both sides define i1 @foo1_and_signbit_lshr_without_shifting_signbit_both_sides(i32 %k, i32 %c1, i32 %c2) { ; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_both_sides( @@ -351,6 +535,22 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_both_sides(i32 %k, i32 ret i1 %or } +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_both_sides_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_both_sides_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 [[K:%.*]], [[C1:%.*]] +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[T0]], [[T2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t0 = shl i32 %k, %c1 + %t1 = icmp sgt i32 %t0, -1 + %t2 = shl i32 %k, %c2 + %t3 = icmp sgt i32 %t2, -1 + %or = select i1 %t1, i1 true, i1 %t3 + ret i1 %or +} + define i1 @foo1_or_signbit_lshr_without_shifting_signbit_both_sides(i32 %k, i32 %c1, i32 %c2) { ; CHECK-LABEL: @foo1_or_signbit_lshr_without_shifting_signbit_both_sides( ; CHECK-NEXT: [[T0:%.*]] = shl i32 [[K:%.*]], [[C1:%.*]] @@ -367,6 +567,22 @@ define i1 @foo1_or_signbit_lshr_without_shifting_signbit_both_sides(i32 %k, i32 ret i1 %or } +define i1 @foo1_or_signbit_lshr_without_shifting_signbit_both_sides_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_or_signbit_lshr_without_shifting_signbit_both_sides_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 [[K:%.*]], [[C1:%.*]] +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[T0]], [[T2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %t0 = shl i32 %k, %c1 + %t1 = icmp slt i32 %t0, 0 + %t2 = shl i32 %k, %c2 + %t3 = icmp slt i32 %t2, 0 + %or = select i1 %t1, i1 %t3, i1 false + ret i1 %or +} + ; Extra use ; Expect to fold @@ -391,6 +607,27 @@ define i1 @foo1_and_extra_use_shl(i32 %k, i32 %c1, i32 %c2, i32* %p) { ret i1 %or } +define i1 @foo1_and_extra_use_shl_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { +; CHECK-LABEL: @foo1_and_extra_use_shl_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: store i32 [[T0]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T0]], [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t0 = shl i32 1, %c1 + store i32 %t0, i32* %p ; extra use of shl + %t1 = shl i32 1, %c2 + %t2 = and i32 %t0, %k + %t3 = icmp eq i32 %t2, 0 + %t4 = and i32 %t1, %k + %t5 = icmp eq i32 %t4, 0 + %or = select i1 %t3, i1 true, i1 %t5 + ret i1 %or +} + ; Should not fold define i1 @foo1_and_extra_use_and(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-LABEL: @foo1_and_extra_use_and( @@ -414,6 +651,28 @@ define i1 @foo1_and_extra_use_and(i32 %k, i32 %c1, i32 %c2, i32* %p) { ret i1 %or } +define i1 @foo1_and_extra_use_and_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { +; CHECK-LABEL: @foo1_and_extra_use_and_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: store i32 [[T2]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T0]], [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t0 = shl i32 1, %c1 + %t1 = shl i32 1, %c2 + %t2 = and i32 %t0, %k + store i32 %t2, i32* %p ; extra use of and + %t3 = icmp eq i32 %t2, 0 + %t4 = and i32 %t1, %k + %t5 = icmp eq i32 %t4, 0 + %or = select i1 %t3, i1 true, i1 %t5 + ret i1 %or +} + ; Should not fold define i1 @foo1_and_extra_use_cmp(i32 %k, i32 %c1, i32 %c2, i1* %p) { ; CHECK-LABEL: @foo1_and_extra_use_cmp( @@ -438,6 +697,29 @@ define i1 @foo1_and_extra_use_cmp(i32 %k, i32 %c1, i32 %c2, i1* %p) { ret i1 %or } +define i1 @foo1_and_extra_use_cmp_logical(i32 %k, i32 %c1, i32 %c2, i1* %p) { +; CHECK-LABEL: @foo1_and_extra_use_cmp_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 +; CHECK-NEXT: store i1 [[T3]], i1* [[P:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T0]], [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t0 = shl i32 1, %c1 + %t1 = shl i32 1, %c2 + %t2 = and i32 %t0, %k + %t3 = icmp eq i32 %t2, 0 + store i1 %t3, i1* %p ; extra use of cmp + %t4 = and i32 %t1, %k + %t5 = icmp eq i32 %t4, 0 + %or = select i1 %t3, i1 true, i1 %t5 + ret i1 %or +} + ; Expect to fold define i1 @foo1_and_extra_use_shl2(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-LABEL: @foo1_and_extra_use_shl2( @@ -460,6 +742,27 @@ define i1 @foo1_and_extra_use_shl2(i32 %k, i32 %c1, i32 %c2, i32* %p) { ret i1 %or } +define i1 @foo1_and_extra_use_shl2_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { +; CHECK-LABEL: @foo1_and_extra_use_shl2_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: store i32 [[T1]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T0]], [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t0 = shl i32 1, %c1 + %t1 = shl i32 1, %c2 + store i32 %t1, i32* %p ; extra use of shl + %t2 = and i32 %t0, %k + %t3 = icmp eq i32 %t2, 0 + %t4 = and i32 %t1, %k + %t5 = icmp eq i32 %t4, 0 + %or = select i1 %t3, i1 true, i1 %t5 + ret i1 %or +} + ; Should not fold define i1 @foo1_and_extra_use_and2(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-LABEL: @foo1_and_extra_use_and2( @@ -483,6 +786,28 @@ define i1 @foo1_and_extra_use_and2(i32 %k, i32 %c1, i32 %c2, i32* %p) { ret i1 %or } +define i1 @foo1_and_extra_use_and2_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { +; CHECK-LABEL: @foo1_and_extra_use_and2_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K:%.*]] +; CHECK-NEXT: store i32 [[T4]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T0]], [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t0 = shl i32 1, %c1 + %t1 = shl i32 1, %c2 + %t2 = and i32 %t0, %k + %t3 = icmp eq i32 %t2, 0 + %t4 = and i32 %t1, %k + store i32 %t4, i32* %p ; extra use of and + %t5 = icmp eq i32 %t4, 0 + %or = select i1 %t3, i1 true, i1 %t5 + ret i1 %or +} + ; Should not fold define i1 @foo1_and_extra_use_cmp2(i32 %k, i32 %c1, i32 %c2, i1* %p) { ; CHECK-LABEL: @foo1_and_extra_use_cmp2( @@ -507,6 +832,29 @@ define i1 @foo1_and_extra_use_cmp2(i32 %k, i32 %c1, i32 %c2, i1* %p) { ret i1 %or } +define i1 @foo1_and_extra_use_cmp2_logical(i32 %k, i32 %c1, i32 %c2, i1* %p) { +; CHECK-LABEL: @foo1_and_extra_use_cmp2_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K:%.*]] +; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], 0 +; CHECK-NEXT: store i1 [[T5]], i1* [[P:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[T0]], [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %t0 = shl i32 1, %c1 + %t1 = shl i32 1, %c2 + %t2 = and i32 %t0, %k + %t3 = icmp eq i32 %t2, 0 + %t4 = and i32 %t1, %k + %t5 = icmp eq i32 %t4, 0 + store i1 %t5, i1* %p ; extra use of cmp + %or = select i1 %t3, i1 true, i1 %t5 + ret i1 %or +} + ; Shift-of-signbit replaced with 'icmp s*' ; Expect to fold define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl1(i32 %k, i32 %c1, i32 %c2, i32* %p) { @@ -530,6 +878,27 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl1(i32 %k, ret i1 %or } +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl1_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl1_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: store i32 [[T0]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T3]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 1, %c1 + store i32 %t0, i32* %p ; extra use of shl + %t1 = and i32 %t0, %k + %t2 = icmp eq i32 %t1, 0 + %t3 = shl i32 %k, %c2 + %t4 = icmp sgt i32 %t3, -1 + %or = select i1 %t2, i1 true, i1 %t4 + ret i1 %or +} + ; Not fold define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_and(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_and( @@ -552,6 +921,27 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_and(i32 %k, ret i1 %or } +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_and_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_and_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: store i32 [[T1]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T3]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 1, %c1 + %t1 = and i32 %t0, %k + store i32 %t1, i32* %p ; extra use of and + %t2 = icmp eq i32 %t1, 0 + %t3 = shl i32 %k, %c2 + %t4 = icmp sgt i32 %t3, -1 + %or = select i1 %t2, i1 true, i1 %t4 + ret i1 %or +} + ; Not fold define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp1(i32 %k, i32 %c1, i32 %c2, i1* %p) { ; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp1( @@ -574,6 +964,27 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp1(i32 %k, ret i1 %or } +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp1_logical(i32 %k, i32 %c1, i32 %c2, i1* %p) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp1_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: store i1 [[T2]], i1* [[P:%.*]], align 1 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T3]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 1, %c1 + %t1 = and i32 %t0, %k + %t2 = icmp eq i32 %t1, 0 + store i1 %t2, i1* %p ; extra use of cmp + %t3 = shl i32 %k, %c2 + %t4 = icmp sgt i32 %t3, -1 + %or = select i1 %t2, i1 true, i1 %t4 + ret i1 %or +} + ; Not fold define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl2(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl2( @@ -596,6 +1007,27 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl2(i32 %k, ret i1 %or } +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl2_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_shl2_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: store i32 [[T3]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T3]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 1, %c1 + %t1 = and i32 %t0, %k + %t2 = icmp eq i32 %t1, 0 + %t3 = shl i32 %k, %c2 + store i32 %t3, i32* %p ; extra use of shl + %t4 = icmp sgt i32 %t3, -1 + %or = select i1 %t2, i1 true, i1 %t4 + ret i1 %or +} + ; Not fold define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp2(i32 %k, i32 %c1, i32 %c2, i1* %p) { ; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp2( @@ -618,6 +1050,27 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp2(i32 %k, ret i1 %or } +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp2_logical(i32 %k, i32 %c1, i32 %c2, i1* %p) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp2_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T3]], -1 +; CHECK-NEXT: store i1 [[T4]], i1* [[P:%.*]], align 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 1, %c1 + %t1 = and i32 %t0, %k + %t2 = icmp eq i32 %t1, 0 + %t3 = shl i32 %k, %c2 + %t4 = icmp sgt i32 %t3, -1 + store i1 %t4, i1* %p ; extra use of cmp + %or = select i1 %t2, i1 true, i1 %t4 + ret i1 %or +} + ; Negative tests ; This test checks that we are not creating additional shift instruction when fold fails. @@ -639,3 +1092,22 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_not_pwr2(i32 %k, i32 % %or = or i1 %t2, %t4 ret i1 %or } + +define i1 @foo1_and_signbit_lshr_without_shifting_signbit_not_pwr2_logical(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_not_pwr2_logical( +; CHECK-NEXT: [[T0:%.*]] = shl i32 3, [[C1:%.*]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[K:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]] +; CHECK-NEXT: [[T4:%.*]] = icmp sgt i32 [[T3]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[OR]] +; + %t0 = shl i32 3, %c1 + %t1 = and i32 %t0, %k + %t2 = icmp eq i32 %t1, 0 + %t3 = shl i32 %k, %c2 + %t4 = icmp sgt i32 %t3, -1 + %or = select i1 %t2, i1 true, i1 %t4 + ret i1 %or +} diff --git a/llvm/test/Transforms/InstCombine/or-fcmp.ll b/llvm/test/Transforms/InstCombine/or-fcmp.ll index 10ac51ae32bc3f..da12ddf668c4ff 100644 --- a/llvm/test/Transforms/InstCombine/or-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/or-fcmp.ll @@ -12,6 +12,17 @@ define i1 @PR1738(double %x, double %y) { ret i1 %or } +define i1 @PR1738_logical(double %x, double %y) { +; CHECK-LABEL: @PR1738_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp1 = fcmp uno double %x, 0.0 + %cmp2 = fcmp uno double %y, 0.0 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + define <2 x i1> @PR1738_vec_undef(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: @PR1738_vec_undef( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <2 x double> [[X:%.*]], [[Y:%.*]] @@ -38,6 +49,21 @@ define i1 @PR41069(double %a, double %b, double %c, double %d) { ret i1 %r } +define i1 @PR41069_logical(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @PR41069_logical( +; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] +; CHECK-NEXT: ret i1 [[R]] +; + %uno1 = fcmp uno double %a, %b + %uno2 = fcmp uno double %c, 0.0 + %or = select i1 %uno1, i1 true, i1 %uno2 + %uno3 = fcmp uno double %d, 0.0 + %r = select i1 %or, i1 true, i1 %uno3 + ret i1 %r +} + define i1 @PR41069_commute(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: @PR41069_commute( ; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -53,6 +79,21 @@ define i1 @PR41069_commute(double %a, double %b, double %c, double %d) { ret i1 %r } +define i1 @PR41069_commute_logical(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @PR41069_commute_logical( +; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] +; CHECK-NEXT: ret i1 [[R]] +; + %uno1 = fcmp uno double %a, %b + %uno2 = fcmp uno double %c, 0.0 + %or = select i1 %uno1, i1 true, i1 %uno2 + %uno3 = fcmp uno double %d, 0.0 + %r = select i1 %uno3, i1 true, i1 %or + ret i1 %r +} + define <2 x i1> @PR41069_vec(<2 x i1> %z, <2 x float> %c, <2 x float> %d) { ; CHECK-LABEL: @PR41069_vec( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <2 x float> [[D:%.*]], [[C:%.*]] @@ -90,6 +131,17 @@ define i1 @fcmp_uno_nonzero(float %x, float %y) { ret i1 %or } +define i1 @fcmp_uno_nonzero_logical(float %x, float %y) { +; CHECK-LABEL: @fcmp_uno_nonzero_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp1 = fcmp uno float %x, 1.0 + %cmp2 = fcmp uno float %y, 2.0 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + define <3 x i1> @fcmp_uno_nonzero_vec(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: @fcmp_uno_nonzero_vec( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <3 x float> [[X:%.*]], [[Y:%.*]] @@ -111,6 +163,16 @@ define i1 @auto_gen_0(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_0_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_0_logical( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp false double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_1(double %a, double %b) { ; CHECK-LABEL: @auto_gen_1( ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -122,6 +184,17 @@ define i1 @auto_gen_1(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_1_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_1_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp oeq double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_2(double %a, double %b) { ; CHECK-LABEL: @auto_gen_2( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] @@ -133,6 +206,17 @@ define i1 @auto_gen_2(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_2_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oeq double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_3(double %a, double %b) { ; CHECK-LABEL: @auto_gen_3( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -144,6 +228,17 @@ define i1 @auto_gen_3(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_3_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_3_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp ogt double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_4(double %a, double %b) { ; CHECK-LABEL: @auto_gen_4( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -155,6 +250,17 @@ define i1 @auto_gen_4(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_4_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_4_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ogt double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_5(double %a, double %b) { ; CHECK-LABEL: @auto_gen_5( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] @@ -166,6 +272,17 @@ define i1 @auto_gen_5(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_5_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_5_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ogt double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_6(double %a, double %b) { ; CHECK-LABEL: @auto_gen_6( ; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -177,6 +294,17 @@ define i1 @auto_gen_6(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_6_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_6_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_7(double %a, double %b) { ; CHECK-LABEL: @auto_gen_7( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -188,6 +316,17 @@ define i1 @auto_gen_7(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_7_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_7_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_8(double %a, double %b) { ; CHECK-LABEL: @auto_gen_8( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -199,6 +338,17 @@ define i1 @auto_gen_8(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_8_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_8_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_9(double %a, double %b) { ; CHECK-LABEL: @auto_gen_9( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] @@ -210,6 +360,17 @@ define i1 @auto_gen_9(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_9_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_9_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp oge double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_10(double %a, double %b) { ; CHECK-LABEL: @auto_gen_10( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -221,6 +382,17 @@ define i1 @auto_gen_10(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_10_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_10_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_11(double %a, double %b) { ; CHECK-LABEL: @auto_gen_11( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -232,6 +404,17 @@ define i1 @auto_gen_11(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_11_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_11_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_12(double %a, double %b) { ; CHECK-LABEL: @auto_gen_12( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -243,6 +426,17 @@ define i1 @auto_gen_12(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_12_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_12_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_13(double %a, double %b) { ; CHECK-LABEL: @auto_gen_13( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -254,6 +448,17 @@ define i1 @auto_gen_13(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_13_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_13_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_14(double %a, double %b) { ; CHECK-LABEL: @auto_gen_14( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] @@ -265,6 +470,17 @@ define i1 @auto_gen_14(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_14_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_14_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp olt double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_15(double %a, double %b) { ; CHECK-LABEL: @auto_gen_15( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -276,6 +492,17 @@ define i1 @auto_gen_15(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_15_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_15_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_16(double %a, double %b) { ; CHECK-LABEL: @auto_gen_16( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -287,6 +514,17 @@ define i1 @auto_gen_16(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_16_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_16_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_17(double %a, double %b) { ; CHECK-LABEL: @auto_gen_17( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -298,6 +536,17 @@ define i1 @auto_gen_17(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_17_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_17_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_18(double %a, double %b) { ; CHECK-LABEL: @auto_gen_18( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -309,6 +558,17 @@ define i1 @auto_gen_18(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_18_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_18_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_19(double %a, double %b) { ; CHECK-LABEL: @auto_gen_19( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -320,6 +580,17 @@ define i1 @auto_gen_19(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_19_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_19_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_20(double %a, double %b) { ; CHECK-LABEL: @auto_gen_20( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] @@ -331,6 +602,17 @@ define i1 @auto_gen_20(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_20_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_20_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ole double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_21(double %a, double %b) { ; CHECK-LABEL: @auto_gen_21( ; CHECK-NEXT: [[CMP:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -342,6 +624,17 @@ define i1 @auto_gen_21(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_21_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_21_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_22(double %a, double %b) { ; CHECK-LABEL: @auto_gen_22( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -353,6 +646,17 @@ define i1 @auto_gen_22(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_22_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_22_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_23(double %a, double %b) { ; CHECK-LABEL: @auto_gen_23( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -364,6 +668,17 @@ define i1 @auto_gen_23(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_23_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_23_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_24(double %a, double %b) { ; CHECK-LABEL: @auto_gen_24( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -375,6 +690,17 @@ define i1 @auto_gen_24(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_24_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_24_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_25(double %a, double %b) { ; CHECK-LABEL: @auto_gen_25( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -386,6 +712,17 @@ define i1 @auto_gen_25(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_25_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_25_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_26(double %a, double %b) { ; CHECK-LABEL: @auto_gen_26( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -397,6 +734,17 @@ define i1 @auto_gen_26(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_26_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_26_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_27(double %a, double %b) { ; CHECK-LABEL: @auto_gen_27( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] @@ -408,9 +756,20 @@ define i1 @auto_gen_27(double %a, double %b) { ret i1 %retval } -define i1 @auto_gen_28(double %a, double %b) { -; CHECK-LABEL: @auto_gen_28( -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +define i1 @auto_gen_27_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_27_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp one double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + +define i1 @auto_gen_28(double %a, double %b) { +; CHECK-LABEL: @auto_gen_28( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[CMP]] ; %cmp = fcmp ord double %a, %b @@ -419,6 +778,17 @@ define i1 @auto_gen_28(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_28_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_28_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_29(double %a, double %b) { ; CHECK-LABEL: @auto_gen_29( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -430,6 +800,17 @@ define i1 @auto_gen_29(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_29_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_29_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_30(double %a, double %b) { ; CHECK-LABEL: @auto_gen_30( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -441,6 +822,17 @@ define i1 @auto_gen_30(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_30_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_30_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_31(double %a, double %b) { ; CHECK-LABEL: @auto_gen_31( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -452,6 +844,17 @@ define i1 @auto_gen_31(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_31_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_31_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_32(double %a, double %b) { ; CHECK-LABEL: @auto_gen_32( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -463,6 +866,17 @@ define i1 @auto_gen_32(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_32_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_32_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_33(double %a, double %b) { ; CHECK-LABEL: @auto_gen_33( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -474,6 +888,17 @@ define i1 @auto_gen_33(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_33_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_33_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_34(double %a, double %b) { ; CHECK-LABEL: @auto_gen_34( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -485,6 +910,17 @@ define i1 @auto_gen_34(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_34_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_34_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_35(double %a, double %b) { ; CHECK-LABEL: @auto_gen_35( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] @@ -496,6 +932,17 @@ define i1 @auto_gen_35(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_35_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_35_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ord double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_36(double %a, double %b) { ; CHECK-LABEL: @auto_gen_36( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -507,6 +954,17 @@ define i1 @auto_gen_36(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_36_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_36_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_37(double %a, double %b) { ; CHECK-LABEL: @auto_gen_37( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -518,6 +976,17 @@ define i1 @auto_gen_37(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_37_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_37_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_38(double %a, double %b) { ; CHECK-LABEL: @auto_gen_38( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -529,6 +998,17 @@ define i1 @auto_gen_38(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_38_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_38_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_39(double %a, double %b) { ; CHECK-LABEL: @auto_gen_39( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -540,6 +1020,17 @@ define i1 @auto_gen_39(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_39_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_39_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_40(double %a, double %b) { ; CHECK-LABEL: @auto_gen_40( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -551,6 +1042,17 @@ define i1 @auto_gen_40(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_40_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_40_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_41(double %a, double %b) { ; CHECK-LABEL: @auto_gen_41( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -562,6 +1064,17 @@ define i1 @auto_gen_41(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_41_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_41_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_42(double %a, double %b) { ; CHECK-LABEL: @auto_gen_42( ; CHECK-NEXT: ret i1 true @@ -572,6 +1085,16 @@ define i1 @auto_gen_42(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_42_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_42_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_43(double %a, double %b) { ; CHECK-LABEL: @auto_gen_43( ; CHECK-NEXT: ret i1 true @@ -582,6 +1105,16 @@ define i1 @auto_gen_43(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_43_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_43_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_44(double %a, double %b) { ; CHECK-LABEL: @auto_gen_44( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -593,6 +1126,17 @@ define i1 @auto_gen_44(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_44_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_44_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ueq double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_45(double %a, double %b) { ; CHECK-LABEL: @auto_gen_45( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -604,6 +1148,17 @@ define i1 @auto_gen_45(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_45_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_45_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_46(double %a, double %b) { ; CHECK-LABEL: @auto_gen_46( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -615,6 +1170,17 @@ define i1 @auto_gen_46(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_46_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_46_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_47(double %a, double %b) { ; CHECK-LABEL: @auto_gen_47( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -626,6 +1192,17 @@ define i1 @auto_gen_47(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_47_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_47_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_48(double %a, double %b) { ; CHECK-LABEL: @auto_gen_48( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -637,6 +1214,17 @@ define i1 @auto_gen_48(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_48_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_48_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_49(double %a, double %b) { ; CHECK-LABEL: @auto_gen_49( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -648,6 +1236,17 @@ define i1 @auto_gen_49(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_49_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_49_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_50(double %a, double %b) { ; CHECK-LABEL: @auto_gen_50( ; CHECK-NEXT: ret i1 true @@ -658,6 +1257,16 @@ define i1 @auto_gen_50(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_50_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_50_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_51(double %a, double %b) { ; CHECK-LABEL: @auto_gen_51( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -669,6 +1278,17 @@ define i1 @auto_gen_51(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_51_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_51_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_52(double %a, double %b) { ; CHECK-LABEL: @auto_gen_52( ; CHECK-NEXT: ret i1 true @@ -679,6 +1299,16 @@ define i1 @auto_gen_52(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_52_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_52_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_53(double %a, double %b) { ; CHECK-LABEL: @auto_gen_53( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -690,6 +1320,17 @@ define i1 @auto_gen_53(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_53_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_53_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_54(double %a, double %b) { ; CHECK-LABEL: @auto_gen_54( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -701,6 +1342,17 @@ define i1 @auto_gen_54(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_54_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_54_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ugt double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_55(double %a, double %b) { ; CHECK-LABEL: @auto_gen_55( ; CHECK-NEXT: [[CMP:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -712,6 +1364,17 @@ define i1 @auto_gen_55(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_55_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_55_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_56(double %a, double %b) { ; CHECK-LABEL: @auto_gen_56( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -723,6 +1386,17 @@ define i1 @auto_gen_56(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_56_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_56_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_57(double %a, double %b) { ; CHECK-LABEL: @auto_gen_57( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -734,6 +1408,17 @@ define i1 @auto_gen_57(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_57_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_57_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_58(double %a, double %b) { ; CHECK-LABEL: @auto_gen_58( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -745,6 +1430,17 @@ define i1 @auto_gen_58(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_58_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_58_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_59(double %a, double %b) { ; CHECK-LABEL: @auto_gen_59( ; CHECK-NEXT: ret i1 true @@ -755,6 +1451,16 @@ define i1 @auto_gen_59(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_59_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_59_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_60(double %a, double %b) { ; CHECK-LABEL: @auto_gen_60( ; CHECK-NEXT: ret i1 true @@ -765,6 +1471,16 @@ define i1 @auto_gen_60(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_60_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_60_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_61(double %a, double %b) { ; CHECK-LABEL: @auto_gen_61( ; CHECK-NEXT: ret i1 true @@ -775,6 +1491,16 @@ define i1 @auto_gen_61(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_61_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_61_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_62(double %a, double %b) { ; CHECK-LABEL: @auto_gen_62( ; CHECK-NEXT: ret i1 true @@ -785,6 +1511,16 @@ define i1 @auto_gen_62(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_62_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_62_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_63(double %a, double %b) { ; CHECK-LABEL: @auto_gen_63( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -796,6 +1532,17 @@ define i1 @auto_gen_63(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_63_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_63_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_64(double %a, double %b) { ; CHECK-LABEL: @auto_gen_64( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -807,6 +1554,17 @@ define i1 @auto_gen_64(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_64_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_64_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_65(double %a, double %b) { ; CHECK-LABEL: @auto_gen_65( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -818,6 +1576,17 @@ define i1 @auto_gen_65(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_65_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_65_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uge double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_66(double %a, double %b) { ; CHECK-LABEL: @auto_gen_66( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -829,25 +1598,58 @@ define i1 @auto_gen_66(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_66_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_66_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_67(double %a, double %b) { ; CHECK-LABEL: @auto_gen_67( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[TMP1]] ; %cmp = fcmp ult double %a, %b - %cmp1 = fcmp oeq double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = or i1 %cmp, %cmp1 + ret i1 %retval +} + +define i1 @auto_gen_67_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_67_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + +define i1 @auto_gen_68(double %a, double %b) { +; CHECK-LABEL: @auto_gen_68( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ogt double %a, %b %retval = or i1 %cmp, %cmp1 ret i1 %retval } -define i1 @auto_gen_68(double %a, double %b) { -; CHECK-LABEL: @auto_gen_68( +define i1 @auto_gen_68_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_68_logical( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[TMP1]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ogt double %a, %b - %retval = or i1 %cmp, %cmp1 + %retval = select i1 %cmp, i1 true, i1 %cmp1 ret i1 %retval } @@ -861,6 +1663,16 @@ define i1 @auto_gen_69(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_69_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_69_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_70(double %a, double %b) { ; CHECK-LABEL: @auto_gen_70( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -872,6 +1684,17 @@ define i1 @auto_gen_70(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_70_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_70_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_71(double %a, double %b) { ; CHECK-LABEL: @auto_gen_71( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -883,6 +1706,17 @@ define i1 @auto_gen_71(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_71_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_71_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_72(double %a, double %b) { ; CHECK-LABEL: @auto_gen_72( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -894,6 +1728,17 @@ define i1 @auto_gen_72(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_72_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_72_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_73(double %a, double %b) { ; CHECK-LABEL: @auto_gen_73( ; CHECK-NEXT: ret i1 true @@ -904,6 +1749,16 @@ define i1 @auto_gen_73(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_73_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_73_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_74(double %a, double %b) { ; CHECK-LABEL: @auto_gen_74( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -915,6 +1770,17 @@ define i1 @auto_gen_74(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_74_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_74_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_75(double %a, double %b) { ; CHECK-LABEL: @auto_gen_75( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -926,6 +1792,17 @@ define i1 @auto_gen_75(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_75_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_75_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_76(double %a, double %b) { ; CHECK-LABEL: @auto_gen_76( ; CHECK-NEXT: ret i1 true @@ -936,6 +1813,16 @@ define i1 @auto_gen_76(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_76_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_76_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_77(double %a, double %b) { ; CHECK-LABEL: @auto_gen_77( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -947,6 +1834,17 @@ define i1 @auto_gen_77(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_77_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_77_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ult double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_78(double %a, double %b) { ; CHECK-LABEL: @auto_gen_78( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -958,6 +1856,17 @@ define i1 @auto_gen_78(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_78_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_78_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_79(double %a, double %b) { ; CHECK-LABEL: @auto_gen_79( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -969,6 +1878,17 @@ define i1 @auto_gen_79(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_79_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_79_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_80(double %a, double %b) { ; CHECK-LABEL: @auto_gen_80( ; CHECK-NEXT: ret i1 true @@ -979,6 +1899,16 @@ define i1 @auto_gen_80(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_80_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_80_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_81(double %a, double %b) { ; CHECK-LABEL: @auto_gen_81( ; CHECK-NEXT: ret i1 true @@ -989,6 +1919,16 @@ define i1 @auto_gen_81(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_81_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_81_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_82(double %a, double %b) { ; CHECK-LABEL: @auto_gen_82( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1000,6 +1940,17 @@ define i1 @auto_gen_82(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_82_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_82_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_83(double %a, double %b) { ; CHECK-LABEL: @auto_gen_83( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1011,6 +1962,17 @@ define i1 @auto_gen_83(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_83_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_83_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_84(double %a, double %b) { ; CHECK-LABEL: @auto_gen_84( ; CHECK-NEXT: ret i1 true @@ -1021,6 +1983,16 @@ define i1 @auto_gen_84(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_84_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_84_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_85(double %a, double %b) { ; CHECK-LABEL: @auto_gen_85( ; CHECK-NEXT: ret i1 true @@ -1031,6 +2003,16 @@ define i1 @auto_gen_85(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_85_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_85_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_86(double %a, double %b) { ; CHECK-LABEL: @auto_gen_86( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1042,6 +2024,17 @@ define i1 @auto_gen_86(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_86_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_86_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_87(double %a, double %b) { ; CHECK-LABEL: @auto_gen_87( ; CHECK-NEXT: ret i1 true @@ -1052,6 +2045,16 @@ define i1 @auto_gen_87(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_87_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_87_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_88(double %a, double %b) { ; CHECK-LABEL: @auto_gen_88( ; CHECK-NEXT: ret i1 true @@ -1062,6 +2065,16 @@ define i1 @auto_gen_88(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_88_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_88_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_89(double %a, double %b) { ; CHECK-LABEL: @auto_gen_89( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1073,6 +2086,17 @@ define i1 @auto_gen_89(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_89_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_89_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_90(double %a, double %b) { ; CHECK-LABEL: @auto_gen_90( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1084,6 +2108,17 @@ define i1 @auto_gen_90(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_90_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_90_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp ule double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_91(double %a, double %b) { ; CHECK-LABEL: @auto_gen_91( ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1095,6 +2130,17 @@ define i1 @auto_gen_91(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_91_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_91_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_92(double %a, double %b) { ; CHECK-LABEL: @auto_gen_92( ; CHECK-NEXT: ret i1 true @@ -1105,6 +2151,16 @@ define i1 @auto_gen_92(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_92_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_92_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_93(double %a, double %b) { ; CHECK-LABEL: @auto_gen_93( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1116,6 +2172,17 @@ define i1 @auto_gen_93(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_93_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_93_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_94(double %a, double %b) { ; CHECK-LABEL: @auto_gen_94( ; CHECK-NEXT: ret i1 true @@ -1126,6 +2193,16 @@ define i1 @auto_gen_94(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_94_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_94_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_95(double %a, double %b) { ; CHECK-LABEL: @auto_gen_95( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1137,6 +2214,17 @@ define i1 @auto_gen_95(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_95_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_95_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_96(double %a, double %b) { ; CHECK-LABEL: @auto_gen_96( ; CHECK-NEXT: ret i1 true @@ -1147,6 +2235,16 @@ define i1 @auto_gen_96(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_96_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_96_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_97(double %a, double %b) { ; CHECK-LABEL: @auto_gen_97( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1158,6 +2256,17 @@ define i1 @auto_gen_97(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_97_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_97_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_98(double %a, double %b) { ; CHECK-LABEL: @auto_gen_98( ; CHECK-NEXT: ret i1 true @@ -1168,6 +2277,16 @@ define i1 @auto_gen_98(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_98_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_98_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_99(double %a, double %b) { ; CHECK-LABEL: @auto_gen_99( ; CHECK-NEXT: ret i1 true @@ -1178,6 +2297,16 @@ define i1 @auto_gen_99(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_99_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_99_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_100(double %a, double %b) { ; CHECK-LABEL: @auto_gen_100( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1189,13 +2318,34 @@ define i1 @auto_gen_100(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_100_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_100_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_101(double %a, double %b) { ; CHECK-LABEL: @auto_gen_101( ; CHECK-NEXT: ret i1 true ; %cmp = fcmp une double %a, %b %cmp1 = fcmp uge double %a, %b - %retval = or i1 %cmp, %cmp1 + %retval = or i1 %cmp, %cmp1 + ret i1 %retval +} + +define i1 @auto_gen_101_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_101_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 ret i1 %retval } @@ -1210,6 +2360,17 @@ define i1 @auto_gen_102(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_102_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_102_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_103(double %a, double %b) { ; CHECK-LABEL: @auto_gen_103( ; CHECK-NEXT: ret i1 true @@ -1220,6 +2381,16 @@ define i1 @auto_gen_103(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_103_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_103_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_104(double %a, double %b) { ; CHECK-LABEL: @auto_gen_104( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1231,6 +2402,17 @@ define i1 @auto_gen_104(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_104_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_104_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp une double %a, %b + %cmp1 = fcmp une double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_105(double %a, double %b) { ; CHECK-LABEL: @auto_gen_105( ; CHECK-NEXT: [[CMP:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1242,6 +2424,17 @@ define i1 @auto_gen_105(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_105_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_105_logical( +; CHECK-NEXT: [[CMP:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_106(double %a, double %b) { ; CHECK-LABEL: @auto_gen_106( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -1253,6 +2446,17 @@ define i1 @auto_gen_106(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_106_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_106_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_107(double %a, double %b) { ; CHECK-LABEL: @auto_gen_107( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -1264,6 +2468,17 @@ define i1 @auto_gen_107(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_107_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_107_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_108(double %a, double %b) { ; CHECK-LABEL: @auto_gen_108( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -1275,6 +2490,17 @@ define i1 @auto_gen_108(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_108_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_108_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_109(double %a, double %b) { ; CHECK-LABEL: @auto_gen_109( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -1286,6 +2512,17 @@ define i1 @auto_gen_109(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_109_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_109_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_110(double %a, double %b) { ; CHECK-LABEL: @auto_gen_110( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1297,6 +2534,17 @@ define i1 @auto_gen_110(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_110_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_110_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_111(double %a, double %b) { ; CHECK-LABEL: @auto_gen_111( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1308,6 +2556,17 @@ define i1 @auto_gen_111(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_111_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_111_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_112(double %a, double %b) { ; CHECK-LABEL: @auto_gen_112( ; CHECK-NEXT: ret i1 true @@ -1318,6 +2577,16 @@ define i1 @auto_gen_112(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_112_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_112_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_113(double %a, double %b) { ; CHECK-LABEL: @auto_gen_113( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] @@ -1329,6 +2598,17 @@ define i1 @auto_gen_113(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_113_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_113_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_114(double %a, double %b) { ; CHECK-LABEL: @auto_gen_114( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] @@ -1340,6 +2620,17 @@ define i1 @auto_gen_114(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_114_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_114_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_115(double %a, double %b) { ; CHECK-LABEL: @auto_gen_115( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] @@ -1351,6 +2642,17 @@ define i1 @auto_gen_115(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_115_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_115_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_116(double %a, double %b) { ; CHECK-LABEL: @auto_gen_116( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] @@ -1362,6 +2664,17 @@ define i1 @auto_gen_116(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_116_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_116_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_117(double %a, double %b) { ; CHECK-LABEL: @auto_gen_117( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] @@ -1373,6 +2686,17 @@ define i1 @auto_gen_117(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_117_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_117_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_118(double %a, double %b) { ; CHECK-LABEL: @auto_gen_118( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] @@ -1384,6 +2708,17 @@ define i1 @auto_gen_118(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_118_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_118_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp une double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_119(double %a, double %b) { ; CHECK-LABEL: @auto_gen_119( ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] @@ -1395,6 +2730,17 @@ define i1 @auto_gen_119(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_119_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_119_logical( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %cmp = fcmp uno double %a, %b + %cmp1 = fcmp uno double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_120(double %a, double %b) { ; CHECK-LABEL: @auto_gen_120( ; CHECK-NEXT: ret i1 true @@ -1405,6 +2751,16 @@ define i1 @auto_gen_120(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_120_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_120_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp false double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_121(double %a, double %b) { ; CHECK-LABEL: @auto_gen_121( ; CHECK-NEXT: ret i1 true @@ -1415,6 +2771,16 @@ define i1 @auto_gen_121(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_121_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_121_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp oeq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_122(double %a, double %b) { ; CHECK-LABEL: @auto_gen_122( ; CHECK-NEXT: ret i1 true @@ -1425,6 +2791,16 @@ define i1 @auto_gen_122(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_122_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_122_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ogt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_123(double %a, double %b) { ; CHECK-LABEL: @auto_gen_123( ; CHECK-NEXT: ret i1 true @@ -1435,6 +2811,16 @@ define i1 @auto_gen_123(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_123_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_123_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp oge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_124(double %a, double %b) { ; CHECK-LABEL: @auto_gen_124( ; CHECK-NEXT: ret i1 true @@ -1445,6 +2831,16 @@ define i1 @auto_gen_124(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_124_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_124_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp olt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_125(double %a, double %b) { ; CHECK-LABEL: @auto_gen_125( ; CHECK-NEXT: ret i1 true @@ -1455,6 +2851,16 @@ define i1 @auto_gen_125(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_125_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_125_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ole double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_126(double %a, double %b) { ; CHECK-LABEL: @auto_gen_126( ; CHECK-NEXT: ret i1 true @@ -1465,6 +2871,16 @@ define i1 @auto_gen_126(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_126_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_126_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp one double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_127(double %a, double %b) { ; CHECK-LABEL: @auto_gen_127( ; CHECK-NEXT: ret i1 true @@ -1475,6 +2891,16 @@ define i1 @auto_gen_127(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_127_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_127_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ord double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_128(double %a, double %b) { ; CHECK-LABEL: @auto_gen_128( ; CHECK-NEXT: ret i1 true @@ -1485,6 +2911,16 @@ define i1 @auto_gen_128(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_128_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_128_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ueq double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_129(double %a, double %b) { ; CHECK-LABEL: @auto_gen_129( ; CHECK-NEXT: ret i1 true @@ -1495,6 +2931,16 @@ define i1 @auto_gen_129(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_129_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_129_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ugt double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_130(double %a, double %b) { ; CHECK-LABEL: @auto_gen_130( ; CHECK-NEXT: ret i1 true @@ -1505,6 +2951,16 @@ define i1 @auto_gen_130(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_130_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_130_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp uge double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_131(double %a, double %b) { ; CHECK-LABEL: @auto_gen_131( ; CHECK-NEXT: ret i1 true @@ -1515,6 +2971,16 @@ define i1 @auto_gen_131(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_131_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_131_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ult double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_132(double %a, double %b) { ; CHECK-LABEL: @auto_gen_132( ; CHECK-NEXT: ret i1 true @@ -1525,6 +2991,16 @@ define i1 @auto_gen_132(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_132_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_132_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp ule double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_133(double %a, double %b) { ; CHECK-LABEL: @auto_gen_133( ; CHECK-NEXT: ret i1 true @@ -1535,6 +3011,16 @@ define i1 @auto_gen_133(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_133_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_133_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp une double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_134(double %a, double %b) { ; CHECK-LABEL: @auto_gen_134( ; CHECK-NEXT: ret i1 true @@ -1545,6 +3031,16 @@ define i1 @auto_gen_134(double %a, double %b) { ret i1 %retval } +define i1 @auto_gen_134_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_134_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp uno double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} + define i1 @auto_gen_135(double %a, double %b) { ; CHECK-LABEL: @auto_gen_135( ; CHECK-NEXT: ret i1 true @@ -1554,3 +3050,13 @@ define i1 @auto_gen_135(double %a, double %b) { %retval = or i1 %cmp, %cmp1 ret i1 %retval } + +define i1 @auto_gen_135_logical(double %a, double %b) { +; CHECK-LABEL: @auto_gen_135_logical( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp true double %a, %b + %cmp1 = fcmp true double %a, %b + %retval = select i1 %cmp, i1 true, i1 %cmp1 + ret i1 %retval +} diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index b5da1734c10200..ff624452656326 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -37,6 +37,18 @@ define i1 @test14(i32 %A, i32 %B) { ret i1 %D } +define i1 @test14_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @test14_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %C1 = icmp ult i32 %A, %B + %C2 = icmp ugt i32 %A, %B + ; (A < B) | (A > B) === A != B + %D = select i1 %C1, i1 true, i1 %C2 + ret i1 %D +} + define i1 @test15(i32 %A, i32 %B) { ; CHECK-LABEL: @test15( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[A:%.*]], [[B:%.*]] @@ -49,6 +61,18 @@ define i1 @test15(i32 %A, i32 %B) { ret i1 %D } +define i1 @test15_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @test15_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %C1 = icmp ult i32 %A, %B + %C2 = icmp eq i32 %A, %B + ; (A < B) | (A == B) === A <= B + %D = select i1 %C1, i1 true, i1 %C2 + ret i1 %D +} + define i32 @test16(i32 %A) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: ret i32 [[A:%.*]] @@ -85,6 +109,18 @@ define i1 @test18(i32 %A) { ret i1 %D } +define i1 @test18_logical(i32 %A) { +; CHECK-LABEL: @test18_logical( +; CHECK-NEXT: [[A_OFF:%.*]] = add i32 [[A:%.*]], -50 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A_OFF]], 49 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %B = icmp sge i32 %A, 100 + %C = icmp slt i32 %A, 50 + %D = select i1 %B, i1 true, i1 %C + ret i1 %D +} + ; FIXME: Vectors should fold too. define <2 x i1> @test18vec(<2 x i32> %A) { ; CHECK-LABEL: @test18vec( @@ -172,6 +208,20 @@ define i1 @test25(i32 %A, i32 %B) { ret i1 %F } +define i1 @test25_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @test25_logical( +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[D:%.*]] = icmp ne i32 [[B:%.*]], 57 +; CHECK-NEXT: [[F:%.*]] = and i1 [[C]], [[D]] +; CHECK-NEXT: ret i1 [[F]] +; + %C = icmp eq i32 %A, 0 + %D = icmp eq i32 %B, 57 + %E = select i1 %C, i1 true, i1 %D + %F = xor i1 %E, -1 + ret i1 %F +} + ; PR5634 define i1 @test26(i32 %A, i32 %B) { ; CHECK-LABEL: @test26( @@ -186,6 +236,19 @@ define i1 @test26(i32 %A, i32 %B) { ret i1 %D } +define i1 @test26_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @test26_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %C1 = icmp eq i32 %A, 0 + %C2 = icmp eq i32 %B, 0 + ; (A == 0) & (A == 0) --> (A|B) == 0 + %D = select i1 %C1, i1 %C2, i1 false + ret i1 %D +} + define i1 @test27(i32* %A, i32* %B) { ; CHECK-LABEL: @test27( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32* [[A:%.*]], null @@ -228,6 +291,19 @@ define i1 @test28(i32 %A, i32 %B) { ret i1 %D } +define i1 @test28_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @test28_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %C1 = icmp ne i32 %A, 0 + %C2 = icmp ne i32 %B, 0 + ; (A != 0) | (A != 0) --> (A|B) != 0 + %D = select i1 %C1, i1 true, i1 %C2 + ret i1 %D +} + define i1 @test29(i32* %A, i32* %B) { ; CHECK-LABEL: @test29( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32* [[A:%.*]], null @@ -342,6 +418,16 @@ define i1 @test33(i1 %X, i1 %Y) { ret i1 %b } +define i1 @test33_logical(i1 %X, i1 %Y) { +; CHECK-LABEL: @test33_logical( +; CHECK-NEXT: [[A:%.*]] = or i1 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[A]] +; + %a = select i1 %X, i1 true, i1 %Y + %b = select i1 %a, i1 true, i1 %X + ret i1 %b +} + define i32 @test34(i32 %X, i32 %Y) { ; CHECK-LABEL: @test34( ; CHECK-NEXT: [[A:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] @@ -377,6 +463,20 @@ define i1 @test36(i32 %x) { ret i1 %ret2 } +define i1 @test36_logical(i32 %x) { +; CHECK-LABEL: @test36_logical( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -23 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp1 = icmp eq i32 %x, 23 + %cmp2 = icmp eq i32 %x, 24 + %ret1 = select i1 %cmp1, i1 true, i1 %cmp2 + %cmp3 = icmp eq i32 %x, 25 + %ret2 = select i1 %ret1, i1 true, i1 %cmp3 + ret i1 %ret2 +} + define i1 @test37(i32 %x) { ; CHECK-LABEL: @test37( ; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X:%.*]], 7 @@ -390,6 +490,19 @@ define i1 @test37(i32 %x) { ret i1 %ret1 } +define i1 @test37_logical(i32 %x) { +; CHECK-LABEL: @test37_logical( +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X:%.*]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ADD1]], 31 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %add1 = add i32 %x, 7 + %cmp1 = icmp ult i32 %add1, 30 + %cmp2 = icmp eq i32 %x, 23 + %ret1 = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %ret1 +} + define <2 x i1> @test37_uniform(<2 x i32> %x) { ; CHECK-LABEL: @test37_uniform( ; CHECK-NEXT: [[ADD1:%.*]] = add <2 x i32> [[X:%.*]], @@ -435,6 +548,21 @@ define i1 @test38(i32 %x) { ret i1 %ret1 } +define i1 @test38_logical(i32 %x) { +; CHECK-LABEL: @test38_logical( +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X:%.*]], 7 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 23 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[ADD1]], 30 +; CHECK-NEXT: [[RET1:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[RET1]] +; + %add1 = add i32 %x, 7 + %cmp1 = icmp eq i32 %x, 23 + %cmp2 = icmp ult i32 %add1, 30 + %ret1 = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %ret1 +} + define <2 x i1> @test38_nonuniform(<2 x i32> %x) { ; CHECK-LABEL: @test38_nonuniform( ; CHECK-NEXT: [[ADD1:%.*]] = add <2 x i32> [[X:%.*]], @@ -649,6 +777,21 @@ define i1 @test46(i8 signext %c) { ret i1 %or } +define i1 @test46_logical(i8 signext %c) { +; CHECK-LABEL: @test46_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[C:%.*]], -33 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -65 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 26 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %c.off = add i8 %c, -97 + %cmp1 = icmp ult i8 %c.off, 26 + %c.off17 = add i8 %c, -65 + %cmp2 = icmp ult i8 %c.off17, 26 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + define <2 x i1> @test46_uniform(<2 x i8> %c) { ; CHECK-LABEL: @test46_uniform( ; CHECK-NEXT: [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], @@ -698,6 +841,21 @@ define i1 @test47(i8 signext %c) { ret i1 %or } +define i1 @test47_logical(i8 signext %c) { +; CHECK-LABEL: @test47_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[C:%.*]], -33 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -65 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 27 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %c.off = add i8 %c, -65 + %cmp1 = icmp ule i8 %c.off, 26 + %c.off17 = add i8 %c, -97 + %cmp2 = icmp ule i8 %c.off17, 26 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + define <2 x i1> @test47_nonuniform(<2 x i8> %c) { ; CHECK-LABEL: @test47_nonuniform( ; CHECK-NEXT: [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], @@ -829,6 +987,21 @@ define i1 @or_andn_cmp_1(i32 %a, i32 %b, i32 %c) { ret i1 %or } +define i1 @or_andn_cmp_1_logical(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @or_andn_cmp_1_logical( +; CHECK-NEXT: [[X:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[OR]] +; + %x = icmp sgt i32 %a, %b + %x_inv = icmp sle i32 %a, %b + %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering + %and = select i1 %y, i1 %x_inv, i1 false + %or = select i1 %x, i1 true, i1 %and + ret i1 %or +} + ; Commute the 'or': ; ((Y & ~X) | X) -> (X | Y), where 'not' is an inverted cmp @@ -865,6 +1038,21 @@ define i1 @or_andn_cmp_3(i72 %a, i72 %b, i72 %c) { ret i1 %or } +define i1 @or_andn_cmp_3_logical(i72 %a, i72 %b, i72 %c) { +; CHECK-LABEL: @or_andn_cmp_3_logical( +; CHECK-NEXT: [[X:%.*]] = icmp ugt i72 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i72 [[C:%.*]], 42 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[OR]] +; + %x = icmp ugt i72 %a, %b + %x_inv = icmp ule i72 %a, %b + %y = icmp ugt i72 %c, 42 ; thwart complexity-based ordering + %and = select i1 %x_inv, i1 %y, i1 false + %or = select i1 %x, i1 true, i1 %and + ret i1 %or +} + ; Commute the 'or': ; ((~X & Y) | X) -> (X | Y), where 'not' is an inverted cmp @@ -901,6 +1089,21 @@ define i1 @orn_and_cmp_1(i37 %a, i37 %b, i37 %c) { ret i1 %or } +define i1 @orn_and_cmp_1_logical(i37 %a, i37 %b, i37 %c) { +; CHECK-LABEL: @orn_and_cmp_1_logical( +; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i37 [[C:%.*]], 42 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[X_INV]], [[Y]] +; CHECK-NEXT: ret i1 [[OR]] +; + %x = icmp sgt i37 %a, %b + %x_inv = icmp sle i37 %a, %b + %y = icmp ugt i37 %c, 42 ; thwart complexity-based ordering + %and = select i1 %y, i1 %x, i1 false + %or = select i1 %x_inv, i1 true, i1 %and + ret i1 %or +} + ; Commute the 'or': ; ((Y & X) | ~X) -> (~X | Y), where 'not' is an inverted cmp @@ -919,6 +1122,21 @@ define i1 @orn_and_cmp_2(i16 %a, i16 %b, i16 %c) { ret i1 %or } +define i1 @orn_and_cmp_2_logical(i16 %a, i16 %b, i16 %c) { +; CHECK-LABEL: @orn_and_cmp_2_logical( +; CHECK-NEXT: [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i16 [[C:%.*]], 42 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[Y]], [[X_INV]] +; CHECK-NEXT: ret i1 [[OR]] +; + %x = icmp sge i16 %a, %b + %x_inv = icmp slt i16 %a, %b + %y = icmp ugt i16 %c, 42 ; thwart complexity-based ordering + %and = select i1 %y, i1 %x, i1 false + %or = select i1 %and, i1 true, i1 %x_inv + ret i1 %or +} + ; Commute the 'and': ; (~X | (X & Y)) -> (~X | Y), where 'not' is an inverted cmp @@ -955,6 +1173,21 @@ define i1 @orn_and_cmp_4(i32 %a, i32 %b, i32 %c) { ret i1 %or } +define i1 @orn_and_cmp_4_logical(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @orn_and_cmp_4_logical( +; CHECK-NEXT: [[X_INV:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[Y]], [[X_INV]] +; CHECK-NEXT: ret i1 [[OR]] +; + %x = icmp eq i32 %a, %b + %x_inv = icmp ne i32 %a, %b + %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering + %and = select i1 %x, i1 %y, i1 false + %or = select i1 %and, i1 true, i1 %x_inv + ret i1 %or +} + ; The constant vectors are inverses. Make sure we can turn this into a select without crashing trying to truncate the constant to 16xi1. define <16 x i1> @test51(<16 x i1> %arg, <16 x i1> %arg1) { ; CHECK-LABEL: @test51( @@ -1002,6 +1235,38 @@ end: ret i32 %conv8 } +define i32 @PR46712_logical(i1 %x, i1 %y, i1 %b, i64 %z) { +; CHECK-LABEL: @PR46712_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[B:%.*]], label [[TRUE:%.*]], label [[END:%.*]] +; CHECK: true: +; CHECK-NEXT: [[BOOL5_NOT:%.*]] = icmp eq i64 [[Z:%.*]], 0 +; CHECK-NEXT: [[SEL:%.*]] = zext i1 [[BOOL5_NOT]] to i32 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[T5:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SEL]], [[TRUE]] ] +; CHECK-NEXT: ret i32 [[T5]] +; +entry: + %t2 = select i1 %x, i1 true, i1 %y + %conv = sext i1 %t2 to i32 + %cmp = icmp sge i32 %conv, 1 + %conv2 = zext i1 %cmp to i64 + br i1 %b, label %true, label %end + +true: + %bool4 = icmp eq i64 %conv2, 0 + %bool5 = icmp ne i64 %z, 0 + %and = select i1 %bool4, i1 %bool5, i1 false + %sel = select i1 %and, i1 false, i1 true + br label %end + +end: + %t5 = phi i1 [ 0, %entry ], [ %sel, %true ] + %conv8 = zext i1 %t5 to i32 + ret i32 %conv8 +} + define i32 @test1(i32 %x, i32 %y) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] diff --git a/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll index ab37c7d56232e4..17d1dd28e1269c 100644 --- a/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll +++ b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; ; This test makes sure that InstCombine does not replace the sequence of @@ -6,8 +7,12 @@ define zeroext i1 @test1(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: %xor = xor i32 %lhs, 5 -; CHECK-NEXT: %cmp1 = icmp eq i32 %xor, 10 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[SEL]] +; %xor = xor i32 %lhs, 5 %cmp1 = icmp eq i32 %xor, 10 @@ -16,10 +21,30 @@ define zeroext i1 @test1(i32 %lhs, i32 %rhs) { ret i1 %sel } +define zeroext i1 @test1_logical(i32 %lhs, i32 %rhs) { +; CHECK-LABEL: @test1_logical( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[SEL]] +; + + %xor = xor i32 %lhs, 5 + %cmp1 = icmp eq i32 %xor, 10 + %cmp2 = icmp eq i32 %xor, %rhs + %sel = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %sel +} + define zeroext i1 @test2(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: %xor = xor i32 %lhs, %rhs -; CHECK-NEXT: %cmp1 = icmp eq i32 %xor, 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 0 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], 32 +; CHECK-NEXT: [[SEL:%.*]] = xor i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[SEL]] +; %xor = xor i32 %lhs, %rhs %cmp1 = icmp eq i32 %xor, 0 @@ -30,8 +55,12 @@ define zeroext i1 @test2(i32 %lhs, i32 %rhs) { define zeroext i1 @test3(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: %sub = sub nsw i32 %lhs, %rhs -; CHECK-NEXT: %cmp1 = icmp eq i32 %sub, 0 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[SUB]], 0 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[SUB]], 31 +; CHECK-NEXT: [[SEL:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[SEL]] +; %sub = sub nsw i32 %lhs, %rhs %cmp1 = icmp eq i32 %sub, 0 @@ -39,3 +68,19 @@ define zeroext i1 @test3(i32 %lhs, i32 %rhs) { %sel = or i1 %cmp1, %cmp2 ret i1 %sel } + +define zeroext i1 @test3_logical(i32 %lhs, i32 %rhs) { +; CHECK-LABEL: @test3_logical( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[SUB]], 0 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[SUB]], 31 +; CHECK-NEXT: [[SEL:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[SEL]] +; + + %sub = sub nsw i32 %lhs, %rhs + %cmp1 = icmp eq i32 %sub, 0 + %cmp2 = icmp eq i32 %sub, 31 + %sel = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %sel +} diff --git a/llvm/test/Transforms/InstCombine/range-check.ll b/llvm/test/Transforms/InstCombine/range-check.ll index ba77beae0f6864..5d56e0a90360c6 100644 --- a/llvm/test/Transforms/InstCombine/range-check.ll +++ b/llvm/test/Transforms/InstCombine/range-check.ll @@ -17,6 +17,19 @@ define i1 @test_and1(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_and1_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and1_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp sge i32 %x, 0 + %b = icmp slt i32 %x, %nn + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @test_and2(i32 %x, i32 %n) { ; CHECK-LABEL: @test_and2( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -30,6 +43,19 @@ define i1 @test_and2(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_and2_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and2_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp sgt i32 %x, -1 + %b = icmp sle i32 %x, %nn + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @test_and3(i32 %x, i32 %n) { ; CHECK-LABEL: @test_and3( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -43,6 +69,19 @@ define i1 @test_and3(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_and3_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and3_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp sgt i32 %nn, %x + %b = icmp sge i32 %x, 0 + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @test_and4(i32 %x, i32 %n) { ; CHECK-LABEL: @test_and4( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -56,6 +95,19 @@ define i1 @test_and4(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_and4_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_and4_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp sge i32 %nn, %x + %b = icmp sge i32 %x, 0 + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @test_or1(i32 %x, i32 %n) { ; CHECK-LABEL: @test_or1( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -69,6 +121,19 @@ define i1 @test_or1(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_or1_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or1_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp slt i32 %x, 0 + %b = icmp sge i32 %x, %nn + %c = select i1 %a, i1 true, i1 %b + ret i1 %c +} + define i1 @test_or2(i32 %x, i32 %n) { ; CHECK-LABEL: @test_or2( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -82,6 +147,19 @@ define i1 @test_or2(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_or2_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or2_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp sle i32 %x, -1 + %b = icmp sgt i32 %x, %nn + %c = select i1 %a, i1 true, i1 %b + ret i1 %c +} + define i1 @test_or3(i32 %x, i32 %n) { ; CHECK-LABEL: @test_or3( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -95,6 +173,19 @@ define i1 @test_or3(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_or3_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or3_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp sle i32 %nn, %x + %b = icmp slt i32 %x, 0 + %c = select i1 %a, i1 true, i1 %b + ret i1 %c +} + define i1 @test_or4(i32 %x, i32 %n) { ; CHECK-LABEL: @test_or4( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -108,6 +199,19 @@ define i1 @test_or4(i32 %x, i32 %n) { ret i1 %c } +define i1 @test_or4_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @test_or4_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %nn = and i32 %n, 2147483647 + %a = icmp slt i32 %nn, %x + %b = icmp slt i32 %x, 0 + %c = select i1 %a, i1 true, i1 %b + ret i1 %c +} + ; Negative tests define i1 @negative1(i32 %x, i32 %n) { @@ -125,6 +229,21 @@ define i1 @negative1(i32 %x, i32 %n) { ret i1 %c } +define i1 @negative1_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @negative1_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp sgt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %nn = and i32 %n, 2147483647 + %a = icmp slt i32 %x, %nn + %b = icmp sgt i32 %x, 0 ; should be: icmp sge + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @negative2(i32 %x, i32 %n) { ; CHECK-LABEL: @negative2( ; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[X:%.*]], [[N:%.*]] @@ -138,6 +257,19 @@ define i1 @negative2(i32 %x, i32 %n) { ret i1 %c } +define i1 @negative2_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @negative2_logical( +; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[X:%.*]], [[N:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %a = icmp slt i32 %x, %n ; n can be negative + %b = icmp sge i32 %x, 0 + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @negative3(i32 %x, i32 %y, i32 %n) { ; CHECK-LABEL: @negative3( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -153,6 +285,21 @@ define i1 @negative3(i32 %x, i32 %y, i32 %n) { ret i1 %c } +define i1 @negative3_logical(i32 %x, i32 %y, i32 %n) { +; CHECK-LABEL: @negative3_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp sgt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[Y:%.*]], -1 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %nn = and i32 %n, 2147483647 + %a = icmp slt i32 %x, %nn + %b = icmp sge i32 %y, 0 ; should compare %x and not %y + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @negative4(i32 %x, i32 %n) { ; CHECK-LABEL: @negative4( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -168,6 +315,21 @@ define i1 @negative4(i32 %x, i32 %n) { ret i1 %c } +define i1 @negative4_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @negative4_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %nn = and i32 %n, 2147483647 + %a = icmp ne i32 %x, %nn ; should be: icmp slt/sle + %b = icmp sge i32 %x, 0 + %c = select i1 %a, i1 %b, i1 false + ret i1 %c +} + define i1 @negative5(i32 %x, i32 %n) { ; CHECK-LABEL: @negative5( ; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 @@ -183,3 +345,18 @@ define i1 @negative5(i32 %x, i32 %n) { ret i1 %c } +define i1 @negative5_logical(i32 %x, i32 %n) { +; CHECK-LABEL: @negative5_logical( +; CHECK-NEXT: [[NN:%.*]] = and i32 [[N:%.*]], 2147483647 +; CHECK-NEXT: [[A:%.*]] = icmp sgt i32 [[NN]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %nn = and i32 %n, 2147483647 + %a = icmp slt i32 %x, %nn + %b = icmp sge i32 %x, 0 + %c = select i1 %a, i1 true, i1 %b ; should be: and + ret i1 %c +} + diff --git a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll index 2b4686004abc2d..bcc62dc983c689 100644 --- a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll +++ b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll @@ -24,6 +24,23 @@ define i1 @t0_bad(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t0_bad_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t0_bad_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ult i8 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ult i8 %adjusted, %base + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + ; Ok, base is non-zero. define i1 @t1(i8 %base, i8 %offset) { ; CHECK-LABEL: @t1( @@ -46,6 +63,27 @@ define i1 @t1(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t1_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t1_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ult i8 %adjusted, %base + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + ; Ok, offset is non-zero. define i1 @t2(i8 %base, i8 %offset) { ; CHECK-LABEL: @t2( @@ -68,6 +106,27 @@ define i1 @t2(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t2_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t2_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[OFFSET:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[OFFSET]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %offset, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ult i8 %adjusted, %base + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + ; We need to produce extra instruction, so one of icmp's must go away. define i1 @t3_oneuse0(i8 %base, i8 %offset) { ; CHECK-LABEL: @t3_oneuse0( @@ -92,6 +151,30 @@ define i1 @t3_oneuse0(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t3_oneuse0_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t3_oneuse0_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %no_underflow = icmp ult i8 %adjusted, %base + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t4_oneuse1(i8 %base, i8 %offset) { ; CHECK-LABEL: @t4_oneuse1( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 @@ -115,6 +198,30 @@ define i1 @t4_oneuse1(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t4_oneuse1_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t4_oneuse1_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ult i8 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ult i8 %adjusted, %base + call void @use1(i1 %no_underflow) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t5_oneuse2_bad(i8 %base, i8 %offset) { ; CHECK-LABEL: @t5_oneuse2_bad( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 @@ -141,6 +248,32 @@ define i1 @t5_oneuse2_bad(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t5_oneuse2_bad_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t5_oneuse2_bad_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ult i8 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %no_underflow = icmp ult i8 %adjusted, %base + call void @use1(i1 %no_underflow) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + define i1 @t6_commutativity0(i8 %base, i8 %offset) { ; CHECK-LABEL: @t6_commutativity0( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 @@ -161,6 +294,27 @@ define i1 @t6_commutativity0(i8 %base, i8 %offset) { %r = and i1 %no_underflow, %not_null ; swapped ret i1 %r } + +define i1 @t6_commutativity0_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t6_commutativity0_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ult i8 %adjusted, %base + %r = select i1 %no_underflow, i1 %not_null, i1 false ; swapped + ret i1 %r +} define i1 @t7_commutativity1(i8 %base, i8 %offset) { ; CHECK-LABEL: @t7_commutativity1( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 @@ -181,6 +335,27 @@ define i1 @t7_commutativity1(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t7_commutativity1_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t7_commutativity1_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ugt i8 %base, %adjusted ; swapped + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t7_commutativity3(i8 %base, i8 %offset) { ; CHECK-LABEL: @t7_commutativity3( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 @@ -202,6 +377,27 @@ define i1 @t7_commutativity3(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t7_commutativity3_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t7_commutativity3_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ugt i8 %base, %adjusted ; swapped + %r = select i1 %no_underflow, i1 %not_null, i1 false ; swapped + ret i1 %r +} + ; We could have the opposite question, did we get null or overflow happened? define i1 @t8(i8 %base, i8 %offset) { ; CHECK-LABEL: @t8( @@ -224,6 +420,27 @@ define i1 @t8(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t8_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t8_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp eq i8 %adjusted, 0 + %no_underflow = icmp uge i8 %adjusted, %base + %r = select i1 %not_null, i1 true, i1 %no_underflow + ret i1 %r +} + ; The comparison can be with any of the values being added. define i1 @t9(i8 %base, i8 %offset) { ; CHECK-LABEL: @t9( @@ -245,3 +462,24 @@ define i1 @t9(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t9_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t9_logical( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[BASE:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %cmp = icmp slt i8 %base, 0 + call void @llvm.assume(i1 %cmp) + + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ult i8 %adjusted, %offset + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll index 7b29bcdd253154..e2f256140b5195 100644 --- a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll +++ b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll @@ -22,6 +22,22 @@ define i1 @t0(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t0_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t0_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[OFFSET]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ule i8 %adjusted, %base + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + ; We need to produce extra instruction, so one of icmp's must go away. define i1 @t1_oneuse0(i8 %base, i8 %offset) { ; CHECK-LABEL: @t1_oneuse0( @@ -41,6 +57,25 @@ define i1 @t1_oneuse0(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t1_oneuse0_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t1_oneuse0_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[OFFSET]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %no_underflow = icmp ule i8 %adjusted, %base + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t2_oneuse1(i8 %base, i8 %offset) { ; CHECK-LABEL: @t2_oneuse1( ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -59,6 +94,25 @@ define i1 @t2_oneuse1(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t2_oneuse1_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t2_oneuse1_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[OFFSET]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ule i8 %adjusted, %base + call void @use1(i1 %no_underflow) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @n3_oneuse2_bad(i8 %base, i8 %offset) { ; CHECK-LABEL: @n3_oneuse2_bad( ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -80,6 +134,27 @@ define i1 @n3_oneuse2_bad(i8 %base, i8 %offset) { ret i1 %r } +define i1 @n3_oneuse2_bad_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @n3_oneuse2_bad_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %no_underflow = icmp ule i8 %adjusted, %base + call void @use1(i1 %no_underflow) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + define i1 @t4_commutativity0(i8 %base, i8 %offset) { ; CHECK-LABEL: @t4_commutativity0( ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -95,6 +170,22 @@ define i1 @t4_commutativity0(i8 %base, i8 %offset) { %r = and i1 %no_underflow, %not_null ; swapped ret i1 %r } + +define i1 @t4_commutativity0_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t4_commutativity0_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[OFFSET]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ule i8 %adjusted, %base + %r = select i1 %no_underflow, i1 %not_null, i1 false ; swapped + ret i1 %r +} define i1 @t5_commutativity1(i8 %base, i8 %offset) { ; CHECK-LABEL: @t5_commutativity1( ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -110,6 +201,22 @@ define i1 @t5_commutativity1(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t5_commutativity1_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t5_commutativity1_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[OFFSET]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp uge i8 %base, %adjusted ; swapped + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t6_commutativity3(i8 %base, i8 %offset) { ; CHECK-LABEL: @t6_commutativity3( ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -126,6 +233,22 @@ define i1 @t6_commutativity3(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t6_commutativity3_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t6_commutativity3_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[OFFSET]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp uge i8 %base, %adjusted ; swapped + %r = select i1 %no_underflow, i1 %not_null, i1 false ; swapped + ret i1 %r +} + ; We could have the opposite question, did we get null or overflow happened? define i1 @t7(i8 %base, i8 %offset) { ; CHECK-LABEL: @t7( @@ -143,6 +266,22 @@ define i1 @t7(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t7_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t7_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[ADJUSTED]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[TMP1]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp eq i8 %adjusted, 0 + %no_underflow = icmp ugt i8 %adjusted, %base + %r = select i1 %not_null, i1 true, i1 %no_underflow + ret i1 %r +} + ; The comparison can be with any of the values being added. define i1 @t8(i8 %base, i8 %offset) { ; CHECK-LABEL: @t8( @@ -159,3 +298,19 @@ define i1 @t8(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t8_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t8_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[BASE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %adjusted = add i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ule i8 %adjusted, %offset + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll b/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll index ae70b9259f0b96..b875396a8c6dde 100644 --- a/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll +++ b/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll @@ -37,6 +37,27 @@ define i1 @t0_noncanonical_ignoreme(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t0_noncanonical_ignoreme_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t0_noncanonical_ignoreme_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %no_underflow = icmp ule i8 %adjusted, %base + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + define i1 @t1(i8 %base, i8 %offset) { ; CHECK-LABEL: @t1( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -57,6 +78,27 @@ define i1 @t1(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t1_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t1_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %no_underflow = icmp uge i8 %base, %offset + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t1_strict(i8 %base, i8 %offset) { ; CHECK-LABEL: @t1_strict( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -77,6 +119,26 @@ define i1 @t1_strict(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t1_strict_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t1_strict_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: ret i1 [[NO_UNDERFLOW]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %no_underflow = icmp ugt i8 %base, %offset ; same is valid for strict predicate + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + define i1 @t2(i8 %base, i8 %offset) { ; CHECK-LABEL: @t2( ; CHECK-NEXT: [[AGG:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[BASE:%.*]], i8 [[OFFSET:%.*]]) @@ -104,6 +166,33 @@ define i1 @t2(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t2_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t2_logical( +; CHECK-NEXT: [[AGG:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[BASE:%.*]], i8 [[OFFSET:%.*]]) +; CHECK-NEXT: call void @useagg({ i8, i1 } [[AGG]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = extractvalue { i8, i1 } [[AGG]], 0 +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[UNDERFLOW:%.*]] = extractvalue { i8, i1 } [[AGG]], 1 +; CHECK-NEXT: call void @use1(i1 [[UNDERFLOW]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = xor i1 [[UNDERFLOW]], true +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] +; + %agg = call {i8, i1} @llvm.usub.with.overflow(i8 %base, i8 %offset) + call void @useagg({i8, i1} %agg) + %adjusted = extractvalue {i8, i1} %agg, 0 + call void @use8(i8 %adjusted) + %underflow = extractvalue {i8, i1} %agg, 1 + call void @use1(i1 %underflow) + %no_underflow = xor i1 %underflow, -1 + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + ; Commutativity define i1 @t3_commutability0(i8 %base, i8 %offset) { @@ -126,6 +215,27 @@ define i1 @t3_commutability0(i8 %base, i8 %offset) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t3_commutability0_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t3_commutability0_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %no_underflow = icmp ule i8 %offset, %base ; swapped + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t4_commutability1(i8 %base, i8 %offset) { ; CHECK-LABEL: @t4_commutability1( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -146,6 +256,27 @@ define i1 @t4_commutability1(i8 %base, i8 %offset) { %r = and i1 %no_underflow, %not_null ; swapped ret i1 %r } + +define i1 @t4_commutability1_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t4_commutability1_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %no_underflow = icmp uge i8 %base, %offset + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %no_underflow, i1 %not_null, i1 false ; swapped + ret i1 %r +} define i1 @t5_commutability2(i8 %base, i8 %offset) { ; CHECK-LABEL: @t5_commutability2( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -167,6 +298,27 @@ define i1 @t5_commutability2(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t5_commutability2_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t5_commutability2_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %no_underflow = icmp ule i8 %offset, %base ; swapped + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %no_underflow, i1 %not_null, i1 false ; swapped + ret i1 %r +} + define i1 @t6_commutability(i8 %base, i8 %offset) { ; CHECK-LABEL: @t6_commutability( ; CHECK-NEXT: [[AGG:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[BASE:%.*]], i8 [[OFFSET:%.*]]) @@ -194,6 +346,33 @@ define i1 @t6_commutability(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t6_commutability_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t6_commutability_logical( +; CHECK-NEXT: [[AGG:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[BASE:%.*]], i8 [[OFFSET:%.*]]) +; CHECK-NEXT: call void @useagg({ i8, i1 } [[AGG]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = extractvalue { i8, i1 } [[AGG]], 0 +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[UNDERFLOW:%.*]] = extractvalue { i8, i1 } [[AGG]], 1 +; CHECK-NEXT: call void @use1(i1 [[UNDERFLOW]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = xor i1 [[UNDERFLOW]], true +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] +; + %agg = call {i8, i1} @llvm.usub.with.overflow(i8 %base, i8 %offset) + call void @useagg({i8, i1} %agg) + %adjusted = extractvalue {i8, i1} %agg, 0 + call void @use8(i8 %adjusted) + %underflow = extractvalue {i8, i1} %agg, 1 + call void @use1(i1 %underflow) + %no_underflow = xor i1 %underflow, -1 + call void @use1(i1 %no_underflow) + %not_null = icmp ne i8 %adjusted, 0 + %r = select i1 %no_underflow, i1 %not_null, i1 false ; swapped + ret i1 %r +} + ; What if we were checking the opposite question, that we either got null, ; or overflow happened? @@ -217,6 +396,27 @@ define i1 @t7(i8 %base, i8 %offset) { %r = or i1 %null, %underflow ret i1 %r } + +define i1 @t7_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t7_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[UNDERFLOW:%.*]] = icmp ult i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[UNDERFLOW]]) +; CHECK-NEXT: [[NULL:%.*]] = icmp eq i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %underflow = icmp ult i8 %base, %offset + call void @use1(i1 %underflow) + %null = icmp eq i8 %adjusted, 0 + call void @use1(i1 %null) + %r = select i1 %null, i1 true, i1 %underflow + ret i1 %r +} define i1 @t7_nonstrict(i8 %base, i8 %offset) { ; CHECK-LABEL: @t7_nonstrict( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -237,6 +437,26 @@ define i1 @t7_nonstrict(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t7_nonstrict_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t7_nonstrict_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[UNDERFLOW:%.*]] = icmp ule i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[UNDERFLOW]]) +; CHECK-NEXT: [[NULL:%.*]] = icmp eq i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NULL]]) +; CHECK-NEXT: ret i1 [[UNDERFLOW]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %underflow = icmp ule i8 %base, %offset ; same is valid for non-strict predicate + call void @use1(i1 %underflow) + %null = icmp eq i8 %adjusted, 0 + call void @use1(i1 %null) + %r = select i1 %null, i1 true, i1 %underflow + ret i1 %r +} + define i1 @t8(i8 %base, i8 %offset) { ; CHECK-LABEL: @t8( ; CHECK-NEXT: [[AGG:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[BASE:%.*]], i8 [[OFFSET:%.*]]) @@ -260,6 +480,29 @@ define i1 @t8(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t8_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t8_logical( +; CHECK-NEXT: [[AGG:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[BASE:%.*]], i8 [[OFFSET:%.*]]) +; CHECK-NEXT: call void @useagg({ i8, i1 } [[AGG]]) +; CHECK-NEXT: [[ADJUSTED:%.*]] = extractvalue { i8, i1 } [[AGG]], 0 +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[UNDERFLOW:%.*]] = extractvalue { i8, i1 } [[AGG]], 1 +; CHECK-NEXT: call void @use1(i1 [[UNDERFLOW]]) +; CHECK-NEXT: [[NULL:%.*]] = icmp eq i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[NULL]], [[UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] +; + %agg = call {i8, i1} @llvm.usub.with.overflow(i8 %base, i8 %offset) + call void @useagg({i8, i1} %agg) + %adjusted = extractvalue {i8, i1} %agg, 0 + call void @use8(i8 %adjusted) + %underflow = extractvalue {i8, i1} %agg, 1 + call void @use1(i1 %underflow) + %null = icmp eq i8 %adjusted, 0 + %r = select i1 %null, i1 true, i1 %underflow + ret i1 %r +} + ; And these patterns also have commutative variants define i1 @t9_commutative(i8 %base, i8 %offset) { @@ -283,6 +526,27 @@ define i1 @t9_commutative(i8 %base, i8 %offset) { ret i1 %r } +define i1 @t9_commutative_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @t9_commutative_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[UNDERFLOW:%.*]] = icmp ult i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: call void @use1(i1 [[UNDERFLOW]]) +; CHECK-NEXT: [[NULL:%.*]] = icmp eq i8 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %underflow = icmp ult i8 %base, %adjusted ; swapped + call void @use1(i1 %underflow) + %null = icmp eq i8 %adjusted, 0 + call void @use1(i1 %null) + %r = select i1 %null, i1 true, i1 %underflow + ret i1 %r +} + ;------------------------------------------------------------------------------- define i1 @t10(i64 %base, i64* nonnull %offsetptr) { @@ -308,6 +572,30 @@ define i1 @t10(i64 %base, i64* nonnull %offsetptr) { %r = and i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t10_logical(i64 %base, i64* nonnull %offsetptr) { +; CHECK-LABEL: @t10_logical( +; CHECK-NEXT: [[OFFSET:%.*]] = ptrtoint i64* [[OFFSETPTR:%.*]] to i64 +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i64 [[BASE:%.*]], [[OFFSET]] +; CHECK-NEXT: call void @use64(i64 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ule i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i64 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %offset = ptrtoint i64* %offsetptr to i64 + + %adjusted = sub i64 %base, %offset + call void @use64(i64 %adjusted) + %no_underflow = icmp ult i64 %adjusted, %base + call void @use1(i1 %no_underflow) + %not_null = icmp ne i64 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} define i1 @t11_commutative(i64 %base, i64* nonnull %offsetptr) { ; CHECK-LABEL: @t11_commutative( ; CHECK-NEXT: [[OFFSET:%.*]] = ptrtoint i64* [[OFFSETPTR:%.*]] to i64 @@ -332,6 +620,30 @@ define i1 @t11_commutative(i64 %base, i64* nonnull %offsetptr) { ret i1 %r } +define i1 @t11_commutative_logical(i64 %base, i64* nonnull %offsetptr) { +; CHECK-LABEL: @t11_commutative_logical( +; CHECK-NEXT: [[OFFSET:%.*]] = ptrtoint i64* [[OFFSETPTR:%.*]] to i64 +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i64 [[BASE:%.*]], [[OFFSET]] +; CHECK-NEXT: call void @use64(i64 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ule i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i64 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %offset = ptrtoint i64* %offsetptr to i64 + + %adjusted = sub i64 %base, %offset + call void @use64(i64 %adjusted) + %no_underflow = icmp ugt i64 %base, %adjusted ; swapped + call void @use1(i1 %no_underflow) + %not_null = icmp ne i64 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + define i1 @t12(i64 %base, i64* nonnull %offsetptr) { ; CHECK-LABEL: @t12( ; CHECK-NEXT: [[OFFSET:%.*]] = ptrtoint i64* [[OFFSETPTR:%.*]] to i64 @@ -355,6 +667,30 @@ define i1 @t12(i64 %base, i64* nonnull %offsetptr) { %r = or i1 %not_null, %no_underflow ret i1 %r } + +define i1 @t12_logical(i64 %base, i64* nonnull %offsetptr) { +; CHECK-LABEL: @t12_logical( +; CHECK-NEXT: [[OFFSET:%.*]] = ptrtoint i64* [[OFFSETPTR:%.*]] to i64 +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i64 [[BASE:%.*]], [[OFFSET]] +; CHECK-NEXT: call void @use64(i64 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ugt i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp eq i64 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %offset = ptrtoint i64* %offsetptr to i64 + + %adjusted = sub i64 %base, %offset + call void @use64(i64 %adjusted) + %no_underflow = icmp uge i64 %adjusted, %base + call void @use1(i1 %no_underflow) + %not_null = icmp eq i64 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 true, i1 %no_underflow + ret i1 %r +} define i1 @t13(i64 %base, i64* nonnull %offsetptr) { ; CHECK-LABEL: @t13( ; CHECK-NEXT: [[OFFSET:%.*]] = ptrtoint i64* [[OFFSETPTR:%.*]] to i64 @@ -379,6 +715,30 @@ define i1 @t13(i64 %base, i64* nonnull %offsetptr) { ret i1 %r } +define i1 @t13_logical(i64 %base, i64* nonnull %offsetptr) { +; CHECK-LABEL: @t13_logical( +; CHECK-NEXT: [[OFFSET:%.*]] = ptrtoint i64* [[OFFSETPTR:%.*]] to i64 +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i64 [[BASE:%.*]], [[OFFSET]] +; CHECK-NEXT: call void @use64(i64 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ugt i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp eq i64 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i64 [[OFFSET]], [[BASE]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %offset = ptrtoint i64* %offsetptr to i64 + + %adjusted = sub i64 %base, %offset + call void @use64(i64 %adjusted) + %no_underflow = icmp ule i64 %base, %adjusted ; swapped + call void @use1(i1 %no_underflow) + %not_null = icmp eq i64 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 true, i1 %no_underflow + ret i1 %r +} + define i1 @t14_bad(i64 %base, i64 %offset) { ; CHECK-LABEL: @t14_bad( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i64 [[BASE:%.*]], [[OFFSET:%.*]] @@ -400,6 +760,27 @@ define i1 @t14_bad(i64 %base, i64 %offset) { ret i1 %r } +define i1 @t14_bad_logical(i64 %base, i64 %offset) { +; CHECK-LABEL: @t14_bad_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i64 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use64(i64 [[ADJUSTED]]) +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ult i64 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: call void @use1(i1 [[NO_UNDERFLOW]]) +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i64 [[ADJUSTED]], 0 +; CHECK-NEXT: call void @use1(i1 [[NOT_NULL]]) +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] +; + %adjusted = sub i64 %base, %offset + call void @use64(i64 %adjusted) + %no_underflow = icmp ult i64 %adjusted, %base + call void @use1(i1 %no_underflow) + %not_null = icmp ne i64 %adjusted, 0 + call void @use1(i1 %not_null) + %r = select i1 %not_null, i1 %no_underflow, i1 false + ret i1 %r +} + define i1 @base_ult_offset(i8 %base, i8 %offset) { ; CHECK-LABEL: @base_ult_offset( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -414,6 +795,21 @@ define i1 @base_ult_offset(i8 %base, i8 %offset) { %r = and i1 %no_underflow, %not_null ret i1 %r } + +define i1 @base_ult_offset_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @base_ult_offset_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp ne i8 %adjusted, 0 + %no_underflow = icmp ule i8 %base, %offset + %r = select i1 %no_underflow, i1 %not_null, i1 false + ret i1 %r +} define i1 @base_uge_offset(i8 %base, i8 %offset) { ; CHECK-LABEL: @base_uge_offset( ; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] @@ -428,3 +824,18 @@ define i1 @base_uge_offset(i8 %base, i8 %offset) { %r = or i1 %no_underflow, %not_null ret i1 %r } + +define i1 @base_uge_offset_logical(i8 %base, i8 %offset) { +; CHECK-LABEL: @base_uge_offset_logical( +; CHECK-NEXT: [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %adjusted = sub i8 %base, %offset + call void @use8(i8 %adjusted) + %not_null = icmp eq i8 %adjusted, 0 + %no_underflow = icmp ugt i8 %base, %offset + %r = select i1 %no_underflow, i1 true, i1 %not_null + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/select-crash-noverify.ll b/llvm/test/Transforms/InstCombine/select-crash-noverify.ll index 4a366aa8fb834c..aa018db5076c41 100644 --- a/llvm/test/Transforms/InstCombine/select-crash-noverify.ll +++ b/llvm/test/Transforms/InstCombine/select-crash-noverify.ll @@ -17,3 +17,19 @@ xpto: return: ret i32 7 } + +define i32 @test3_logical(i1 %bool, i32 %a) { +entry: + %cond = select i1 %bool, i1 true, i1 true + br i1 %cond, label %return, label %xpto + +; technically reachable, but this malformed IR may appear as a result of constant propagation +xpto: + %select = select i1 %bool, i32 %a, i32 %select + %select2 = select i1 %bool, i32 %select2, i32 %a + %sum = add i32 %select, %select2 + ret i32 %sum + +return: + ret i32 7 +} diff --git a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll index c534acd70cdb62..64b16905fb9732 100644 --- a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll +++ b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll @@ -16,7 +16,7 @@ declare void @use2(i1) define i32 @select_clz_to_ctz(i32 %a) { ; CHECK-LABEL: @select_clz_to_ctz( -; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), !range !0 +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), [[RNG0:!range !.*]] ; CHECK-NEXT: ret i32 [[COND]] ; %sub = sub i32 0, %a @@ -30,7 +30,7 @@ define i32 @select_clz_to_ctz(i32 %a) { define i32 @select_clz_to_ctz_preserve_flag(i32 %a) { ; CHECK-LABEL: @select_clz_to_ctz_preserve_flag( -; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 false), !range !0 +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 false), [[RNG0]] ; CHECK-NEXT: ret i32 [[COND]] ; %sub = sub i32 0, %a @@ -60,10 +60,10 @@ define i32 @select_clz_to_ctz_extra_use(i32 %a) { ; CHECK-LABEL: @select_clz_to_ctz_extra_use( ; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]] -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), !range !0 +; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), [[RNG0]] ; CHECK-NEXT: [[SUB1:%.*]] = xor i32 [[LZ]], 31 ; CHECK-NEXT: call void @use(i32 [[SUB1]]) -; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range !0 +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[COND]] ; %sub = sub i32 0, %a @@ -78,7 +78,7 @@ define i32 @select_clz_to_ctz_extra_use(i32 %a) { define i32 @select_clz_to_ctz_and_commuted(i32 %a) { ; CHECK-LABEL: @select_clz_to_ctz_and_commuted( -; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), !range !0 +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[COND]] ; %sub = sub i32 0, %a @@ -94,7 +94,7 @@ define i32 @select_clz_to_ctz_icmp_ne(i32 %a) { ; CHECK-LABEL: @select_clz_to_ctz_icmp_ne( ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 ; CHECK-NEXT: call void @use2(i1 [[TOBOOL]]) -; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range !0 +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[COND]] ; %sub = sub i32 0, %a @@ -109,7 +109,7 @@ define i32 @select_clz_to_ctz_icmp_ne(i32 %a) { define i64 @select_clz_to_ctz_i64(i64 %a) { ; CHECK-LABEL: @select_clz_to_ctz_i64( -; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.cttz.i64(i64 [[A:%.*]], i1 true), !range !1 +; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.cttz.i64(i64 [[A:%.*]], i1 true), [[RNG1:!range !.*]] ; CHECK-NEXT: ret i64 [[COND]] ; %sub = sub i64 0, %a @@ -127,7 +127,7 @@ define i32 @select_clz_to_ctz_wrong_sub(i32 %a) { ; CHECK-LABEL: @select_clz_to_ctz_wrong_sub( ; CHECK-NEXT: [[SUB:%.*]] = sub i32 1, [[A:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]] -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), !range !0 +; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), [[RNG0]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 ; CHECK-NEXT: [[SUB1:%.*]] = xor i32 [[LZ]], 31 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[LZ]], i32 [[SUB1]] @@ -146,7 +146,7 @@ define i64 @select_clz_to_ctz_i64_wrong_xor(i64 %a) { ; CHECK-LABEL: @select_clz_to_ctz_i64_wrong_xor( ; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[A:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[SUB]], [[A]] -; CHECK-NEXT: [[LZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[AND]], i1 true), !range !1 +; CHECK-NEXT: [[LZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[AND]], i1 true), [[RNG1]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A]], 0 ; CHECK-NEXT: [[SUB11:%.*]] = or i64 [[LZ]], 64 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 [[LZ]], i64 [[SUB11]] @@ -165,7 +165,7 @@ define i64 @select_clz_to_ctz_i64_wrong_icmp_cst(i64 %a) { ; CHECK-LABEL: @select_clz_to_ctz_i64_wrong_icmp_cst( ; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[A:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[SUB]], [[A]] -; CHECK-NEXT: [[LZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[AND]], i1 true), !range !1 +; CHECK-NEXT: [[LZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[AND]], i1 true), [[RNG1]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A]], 1 ; CHECK-NEXT: [[SUB1:%.*]] = xor i64 [[LZ]], 63 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 [[LZ]], i64 [[SUB1]] @@ -184,7 +184,7 @@ define i64 @select_clz_to_ctz_i64_wrong_icmp_pred(i64 %a) { ; CHECK-LABEL: @select_clz_to_ctz_i64_wrong_icmp_pred( ; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[A:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[SUB]], [[A]] -; CHECK-NEXT: [[LZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[AND]], i1 true), !range !1 +; CHECK-NEXT: [[LZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[AND]], i1 true), [[RNG1]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp slt i64 [[A]], 0 ; CHECK-NEXT: [[SUB1:%.*]] = xor i64 [[LZ]], 63 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 [[LZ]], i64 [[SUB1]] @@ -220,11 +220,11 @@ define <2 x i32> @select_clz_to_ctz_vec_with_undef(<2 x i32> %a) { define i4 @PR45762(i3 %x4) { ; CHECK-LABEL: @PR45762( -; CHECK-NEXT: [[T4:%.*]] = call i3 @llvm.cttz.i3(i3 [[X4:%.*]], i1 false), !range !2 +; CHECK-NEXT: [[T4:%.*]] = call i3 @llvm.cttz.i3(i3 [[X4:%.*]], i1 false), [[RNG2:!range !.*]] ; CHECK-NEXT: [[T7:%.*]] = zext i3 [[T4]] to i4 ; CHECK-NEXT: [[ONE_HOT_16:%.*]] = shl i4 1, [[T7]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i3 [[X4]], 0 -; CHECK-NEXT: [[UMUL_23:%.*]] = select i1 [[TMP1]], i4 0, i4 [[T7]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i3 [[X4]], 0 +; CHECK-NEXT: [[UMUL_23:%.*]] = select i1 [[DOTNOT]], i4 0, i4 [[T7]] ; CHECK-NEXT: [[SEL_71:%.*]] = shl i4 [[ONE_HOT_16]], [[UMUL_23]] ; CHECK-NEXT: ret i4 [[SEL_71]] ; @@ -246,3 +246,32 @@ define i4 @PR45762(i3 %x4) { %sel_71 = select i1 %t12, i4 %one_hot_16, i4 %umul_23 ret i4 %sel_71 } + +define i4 @PR45762_logical(i3 %x4) { +; CHECK-LABEL: @PR45762_logical( +; CHECK-NEXT: [[T4:%.*]] = call i3 @llvm.cttz.i3(i3 [[X4:%.*]], i1 false), [[RNG2]] +; CHECK-NEXT: [[T7:%.*]] = zext i3 [[T4]] to i4 +; CHECK-NEXT: [[ONE_HOT_16:%.*]] = shl i4 1, [[T7]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i3 [[X4]], 0 +; CHECK-NEXT: [[UMUL_23:%.*]] = select i1 [[DOTNOT]], i4 0, i4 [[T7]] +; CHECK-NEXT: [[SEL_71:%.*]] = shl i4 [[ONE_HOT_16]], [[UMUL_23]] +; CHECK-NEXT: ret i4 [[SEL_71]] +; + %t4 = call i3 @llvm.cttz.i3(i3 %x4, i1 false) + %t5 = icmp eq i3 %x4, 0 + %t6 = select i1 %t5, i3 3, i3 %t4 + %t7 = zext i3 %t6 to i4 + %one_hot_16 = shl i4 1, %t7 + %t8 = lshr i4 %one_hot_16, 0 + %bit_slice_61 = trunc i4 %t8 to i1 + %t9 = lshr i4 %one_hot_16, 1 + %bit_slice_62 = trunc i4 %t9 to i1 + %t10 = lshr i4 %one_hot_16, 2 + %bit_slice_64 = trunc i4 %t10 to i1 + %t11 = select i1 %bit_slice_61, i1 true, i1 %bit_slice_62 + %or_69 = select i1 %t11, i1 true, i1 %bit_slice_64 + %umul_23 = mul i4 %one_hot_16, %one_hot_16 + %t12 = icmp eq i1 %or_69, false + %sel_71 = select i1 %t12, i4 %one_hot_16, i4 %umul_23 + ret i4 %sel_71 +} diff --git a/llvm/test/Transforms/InstCombine/select-imm-canon.ll b/llvm/test/Transforms/InstCombine/select-imm-canon.ll index 272d4a47ce6887..e230b3b9277749 100644 --- a/llvm/test/Transforms/InstCombine/select-imm-canon.ll +++ b/llvm/test/Transforms/InstCombine/select-imm-canon.ll @@ -5,9 +5,9 @@ define i8 @single(i32 %A) { ; CHECK-LABEL: @single( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[A:%.*]], -128 -; CHECK-NEXT: [[L2:%.*]] = select i1 [[TMP0]], i32 [[A]], i32 -128 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[L2]] to i8 -; CHECK-NEXT: ret i8 [[CONV7]] +; CHECK-NEXT: [[CONV71:%.*]] = select i1 [[TMP0]], i32 [[A]], i32 -128 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[CONV71]] to i8 +; CHECK-NEXT: ret i8 [[TMP1]] ; entry: %l1 = icmp slt i32 %A, -128 @@ -20,11 +20,11 @@ define i8 @double(i32 %A) { ; CHECK-LABEL: @double( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[A:%.*]], -128 -; CHECK-NEXT: [[L2:%.*]] = select i1 [[TMP0]], i32 [[A]], i32 -128 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[L2]], 127 -; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[TMP1]], i32 [[L2]], i32 127 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i8 -; CHECK-NEXT: ret i8 [[CONV7]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[A]], i32 -128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 127 +; CHECK-NEXT: [[CONV71:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 127 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[CONV71]] to i8 +; CHECK-NEXT: ret i8 [[TMP3]] ; entry: %l1 = icmp slt i32 %A, -128 @@ -68,3 +68,22 @@ define i8 @original(i32 %A, i32 %B) { %conv7 = trunc i32 %spec.select.i to i8 ret i8 %conv7 } + +define i8 @original_logical(i32 %A, i32 %B) { +; CHECK-LABEL: @original_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -128 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127 +; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127 +; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i8 +; CHECK-NEXT: ret i8 [[CONV7]] +; + %cmp4.i = icmp slt i32 127, %A + %cmp6.i = icmp sle i32 -128, %A + %retval.0.i = select i1 %cmp4.i, i32 127, i32 -128 + %not.cmp4.i = xor i1 %cmp4.i, true + %cleanup.dest.slot.0.i = select i1 %cmp6.i, i1 %not.cmp4.i, i1 false + %spec.select.i = select i1 %cleanup.dest.slot.0.i, i32 %A, i32 %retval.0.i + %conv7 = trunc i32 %spec.select.i to i8 + ret i8 %conv7 +} diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 819ac6dd3a82de..d603de371ba020 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -70,7 +70,7 @@ define <2 x i1> @test8vec(<2 x i1> %C, <2 x i1> %X) { define @test8vvec( %C, %X) { ; CHECK-LABEL: @test8vvec( -; CHECK-NEXT: [[R:%.*]] = and [[C:%.*]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and [[C:%.*]], [[X:%.*]] ; CHECK-NEXT: ret [[R]] ; %R = select %C, %X, zeroinitializer @@ -501,6 +501,27 @@ ret: ret i32 %b } +define i32 @test26_logical(i1 %cond) { +; CHECK-LABEL: @test26_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[JUMP:%.*]], label [[RET:%.*]] +; CHECK: jump: +; CHECK-NEXT: br label [[RET]] +; CHECK: ret: +; CHECK-NEXT: [[B:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[B]] +; +entry: + br i1 %cond, label %jump, label %ret +jump: + %c = select i1 false, i1 true, i1 false + br label %ret +ret: + %a = phi i1 [true, %entry], [%c, %jump] + %b = select i1 %a, i32 20, i32 10 + ret i32 %b +} + define i32 @test27(i1 %c, i32 %A, i32 %B) { ; CHECK-LABEL: @test27( ; CHECK-NEXT: entry: @@ -720,9 +741,9 @@ define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) { define @bitcast_select_bitcast( %icmp, %a, %b) { ; CHECK-LABEL: @bitcast_select_bitcast( -; CHECK-NEXT: [[BC1:%.*]] = bitcast [[A:%.*]] to -; CHECK-NEXT: [[SELECT:%.*]] = select [[ICMP:%.*]], [[B:%.*]], [[BC1]] -; CHECK-NEXT: ret [[SELECT]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [[A:%.*]] to +; CHECK-NEXT: [[BC2:%.*]] = select [[ICMP:%.*]], [[B:%.*]], [[TMP1]] +; CHECK-NEXT: ret [[BC2]] ; %bc1 = bitcast %b to %select = select %icmp, %bc1, %a diff --git a/llvm/test/Transforms/InstCombine/set.ll b/llvm/test/Transforms/InstCombine/set.ll index b8c349aaf94e48..bc2cdb7c469e9a 100644 --- a/llvm/test/Transforms/InstCombine/set.ll +++ b/llvm/test/Transforms/InstCombine/set.ll @@ -15,6 +15,17 @@ define i1 @test1(i32 %A) { ret i1 %D } +define i1 @test1_logical(i32 %A) { +; CHECK-LABEL: @test1_logical( +; CHECK-NEXT: ret i1 false +; + %B = icmp eq i32 %A, %A + ; Never true + %C = icmp eq i32* @X, null + %D = select i1 %B, i1 %C, i1 false + ret i1 %D +} + define i1 @test2(i32 %A) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: ret i1 true @@ -26,6 +37,17 @@ define i1 @test2(i32 %A) { ret i1 %D } +define i1 @test2_logical(i32 %A) { +; CHECK-LABEL: @test2_logical( +; CHECK-NEXT: ret i1 true +; + %B = icmp ne i32 %A, %A + ; Never false + %C = icmp ne i32* @X, null + %D = select i1 %B, i1 true, i1 %C + ret i1 %D +} + define i1 @test3(i32 %A) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: ret i1 false @@ -160,6 +182,18 @@ define i1 @bool_eq0(i64 %a) { ret i1 %and } +define i1 @bool_eq0_logical(i64 %a) { +; CHECK-LABEL: @bool_eq0_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %b = icmp sgt i64 %a, 0 + %c = icmp eq i64 %a, 1 + %notc = icmp eq i1 %c, false + %and = select i1 %b, i1 %notc, i1 false + ret i1 %and +} + ; This is equivalent to the previous test. define i1 @xor_of_icmps(i64 %a) { @@ -207,8 +241,8 @@ define i32 @PR2844(i32 %x) { ; CHECK-LABEL: @PR2844( ; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], 0 ; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -638208502 -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[NOT_OR:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: [[SEL:%.*]] = zext i1 [[NOT_OR]] to i32 ; CHECK-NEXT: ret i32 [[SEL]] ; %A = icmp eq i32 %x, 0 @@ -218,6 +252,21 @@ define i32 @PR2844(i32 %x) { ret i32 %sel } +define i32 @PR2844_logical(i32 %x) { +; CHECK-LABEL: @PR2844_logical( +; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 [[X]], -638208502 +; CHECK-NEXT: [[NOT_OR:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: [[SEL:%.*]] = zext i1 [[NOT_OR]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %A = icmp eq i32 %x, 0 + %B = icmp slt i32 %x, -638208501 + %or = select i1 %A, i1 true, i1 %B + %sel = select i1 %or, i32 0, i32 1 + ret i32 %sel +} + define i1 @test16(i32 %A) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: ret i1 false @@ -284,8 +333,8 @@ define i32 @test20(i32 %A) { define <2 x i32> @test20vec(<2 x i32> %A) { ; CHECK-LABEL: @test20vec( -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: ret <2 x i32> [[B]] +; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i32> [[D]] ; %B = and <2 x i32> %A, %C = icmp ne <2 x i32> %B, zeroinitializer @@ -329,6 +378,18 @@ define i1 @test22(i32 %A, i32 %X) { ret i1 %R } +define i1 @test22_logical(i32 %A, i32 %X) { +; CHECK-LABEL: @test22_logical( +; CHECK-NEXT: ret i1 true +; + %B = and i32 %A, 100663295 + %C = icmp ult i32 %B, 268435456 + %Y = and i32 %X, 7 + %Z = icmp sgt i32 %Y, -1 + %R = select i1 %C, i1 true, i1 %Z + ret i1 %R +} + define i32 @test23(i32 %a) { ; CHECK-LABEL: @test23( ; CHECK-NEXT: [[TMP_1:%.*]] = and i32 [[A:%.*]], 1 @@ -355,10 +416,10 @@ define <2 x i32> @test23vec(<2 x i32> %a) { define i32 @test24(i32 %a) { ; CHECK-LABEL: @test24( -; CHECK-NEXT: [[TMP_1:%.*]] = lshr i32 [[A:%.*]], 2 -; CHECK-NEXT: [[TMP_1_LOBIT:%.*]] = and i32 [[TMP_1]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[TMP_1_LOBIT]], 1 -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 2 +; CHECK-NEXT: [[DOTLOBIT:%.*]] = and i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[DOTLOBIT]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %tmp1 = and i32 %a, 4 %tmp.1 = lshr i32 %tmp1, 2 @@ -369,10 +430,10 @@ define i32 @test24(i32 %a) { define <2 x i32> @test24vec(<2 x i32> %a) { ; CHECK-LABEL: @test24vec( -; CHECK-NEXT: [[TMP_1:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP_1_LOBIT:%.*]] = and <2 x i32> [[TMP_1]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[TMP_1_LOBIT]], -; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[DOTLOBIT:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[DOTLOBIT]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %tmp1 = and <2 x i32> %a, %tmp.1 = lshr <2 x i32> %tmp1, diff --git a/llvm/test/Transforms/InstCombine/sign-test-and-or.ll b/llvm/test/Transforms/InstCombine/sign-test-and-or.ll index 1920a800ef1f08..a71cb54b9fc29d 100644 --- a/llvm/test/Transforms/InstCombine/sign-test-and-or.ll +++ b/llvm/test/Transforms/InstCombine/sign-test-and-or.ll @@ -5,7 +5,7 @@ declare void @foo() define i1 @test1(i32 %a, i32 %b) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -15,9 +15,21 @@ define i1 @test1(i32 %a, i32 %b) { ret i1 %or.cond } +define i1 @test1_logical(i32 %a, i32 %b) { +; CHECK-LABEL: @test1_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = icmp slt i32 %a, 0 + %2 = icmp slt i32 %b, 0 + %or.cond = select i1 %1, i1 true, i1 %2 + ret i1 %or.cond +} + define i1 @test2(i32 %a, i32 %b) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -27,9 +39,21 @@ define i1 @test2(i32 %a, i32 %b) { ret i1 %or.cond } +define i1 @test2_logical(i32 %a, i32 %b) { +; CHECK-LABEL: @test2_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = icmp sgt i32 %a, -1 + %2 = icmp sgt i32 %b, -1 + %or.cond = select i1 %1, i1 true, i1 %2 + ret i1 %or.cond +} + define i1 @test3(i32 %a, i32 %b) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -39,9 +63,21 @@ define i1 @test3(i32 %a, i32 %b) { ret i1 %or.cond } +define i1 @test3_logical(i32 %a, i32 %b) { +; CHECK-LABEL: @test3_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = icmp slt i32 %a, 0 + %2 = icmp slt i32 %b, 0 + %or.cond = select i1 %1, i1 %2, i1 false + ret i1 %or.cond +} + define i1 @test4(i32 %a, i32 %b) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -51,11 +87,28 @@ define i1 @test4(i32 %a, i32 %b) { ret i1 %or.cond } +define i1 @test4_logical(i32 %a, i32 %b) { +; CHECK-LABEL: @test4_logical( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = icmp sgt i32 %a, -1 + %2 = icmp sgt i32 %b, -1 + %or.cond = select i1 %1, i1 %2, i1 false + ret i1 %or.cond +} + define void @test5(i32 %a) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, -2013265920 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label %if.then, label %if.end +; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() [[ATTR0:#.*]] +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void ; %and = and i32 %a, 134217728 %1 = icmp eq i32 %and, 0 @@ -64,6 +117,32 @@ define void @test5(i32 %a) { br i1 %or.cond, label %if.then, label %if.end +if.then: + tail call void @foo() nounwind + ret void + +if.end: + ret void +} + +define void @test5_logical(i32 %a) { +; CHECK-LABEL: @test5_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() [[ATTR0]] +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; + %and = and i32 %a, 134217728 + %1 = icmp eq i32 %and, 0 + %2 = icmp sgt i32 %a, -1 + %or.cond = select i1 %1, i1 %2, i1 false + br i1 %or.cond, label %if.then, label %if.end + + if.then: tail call void @foo() nounwind ret void @@ -74,9 +153,14 @@ if.end: define void @test6(i32 %a) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, -2013265920 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label %if.then, label %if.end +; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() [[ATTR0]] +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void ; %1 = icmp sgt i32 %a, -1 %and = and i32 %a, 134217728 @@ -85,6 +169,32 @@ define void @test6(i32 %a) { br i1 %or.cond, label %if.then, label %if.end +if.then: + tail call void @foo() nounwind + ret void + +if.end: + ret void +} + +define void @test6_logical(i32 %a) { +; CHECK-LABEL: @test6_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() [[ATTR0]] +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; + %1 = icmp sgt i32 %a, -1 + %and = and i32 %a, 134217728 + %2 = icmp eq i32 %and, 0 + %or.cond = select i1 %1, i1 %2, i1 false + br i1 %or.cond, label %if.then, label %if.end + + if.then: tail call void @foo() nounwind ret void @@ -95,9 +205,14 @@ if.end: define void @test7(i32 %a) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, -2013265920 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label %if.end, label %if.then +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() [[ATTR0]] +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void ; %and = and i32 %a, 134217728 %1 = icmp ne i32 %and, 0 @@ -106,6 +221,32 @@ define void @test7(i32 %a) { br i1 %or.cond, label %if.then, label %if.end +if.then: + tail call void @foo() nounwind + ret void + +if.end: + ret void +} + +define void @test7_logical(i32 %a) { +; CHECK-LABEL: @test7_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() [[ATTR0]] +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; + %and = and i32 %a, 134217728 + %1 = icmp ne i32 %and, 0 + %2 = icmp slt i32 %a, 0 + %or.cond = select i1 %1, i1 true, i1 %2 + br i1 %or.cond, label %if.then, label %if.end + + if.then: tail call void @foo() nounwind ret void @@ -116,9 +257,14 @@ if.end: define void @test8(i32 %a) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, -2013265920 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label %if.end, label %if.then +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void ; %1 = icmp slt i32 %a, 0 %and = and i32 %a, 134217728 @@ -127,6 +273,32 @@ define void @test8(i32 %a) { br i1 %or.cond, label %if.then, label %if.end +if.then: + tail call void @foo() + ret void + +if.end: + ret void +} + +define void @test8_logical(i32 %a) { +; CHECK-LABEL: @test8_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -2013265920 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @foo() +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; + %1 = icmp slt i32 %a, 0 + %and = and i32 %a, 134217728 + %2 = icmp ne i32 %and, 0 + %or.cond = select i1 %1, i1 true, i1 %2 + br i1 %or.cond, label %if.then, label %if.end + + if.then: tail call void @foo() ret void @@ -137,7 +309,7 @@ if.end: define i1 @test9(i32 %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, -1073741824 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -1073741824 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1073741824 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -148,9 +320,22 @@ define i1 @test9(i32 %a) { ret i1 %or.cond } +define i1 @test9_logical(i32 %a) { +; CHECK-LABEL: @test9_logical( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -1073741824 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1073741824 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = and i32 %a, 1073741824 + %2 = icmp ne i32 %1, 0 + %3 = icmp sgt i32 %a, -1 + %or.cond = select i1 %2, i1 %3, i1 false + ret i1 %or.cond +} + define i1 @test10(i32 %a) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %a, 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 ; CHECK-NEXT: ret i1 [[TMP1]] ; %1 = and i32 %a, 2 @@ -160,9 +345,21 @@ define i1 @test10(i32 %a) { ret i1 %or.cond } +define i1 @test10_logical(i32 %a) { +; CHECK-LABEL: @test10_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = and i32 %a, 2 + %2 = icmp eq i32 %1, 0 + %3 = icmp ult i32 %a, 4 + %or.cond = select i1 %2, i1 %3, i1 false + ret i1 %or.cond +} + define i1 @test11(i32 %a) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 %a, 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 ; CHECK-NEXT: ret i1 [[TMP1]] ; %1 = and i32 %a, 2 @@ -171,3 +368,15 @@ define i1 @test11(i32 %a) { %or.cond = or i1 %2, %3 ret i1 %or.cond } + +define i1 @test11_logical(i32 %a) { +; CHECK-LABEL: @test11_logical( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = and i32 %a, 2 + %2 = icmp ne i32 %1, 0 + %3 = icmp ugt i32 %a, 3 + %or.cond = select i1 %2, i1 true, i1 %3 + ret i1 %or.cond +} diff --git a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll index a69129cbcd22db..62a62b97e90d25 100644 --- a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll @@ -48,6 +48,18 @@ define i1 @positive_with_signbit(i32 %arg) { ret i1 %t4 } +define i1 @positive_with_signbit_logical(i32 %arg) { +; CHECK-LABEL: @positive_with_signbit_logical( +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 +; CHECK-NEXT: ret i1 [[T4_SIMPLIFIED]] +; + %t1 = icmp sgt i32 %arg, -1 + %t2 = add i32 %arg, 128 + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + define i1 @positive_with_mask(i32 %arg) { ; CHECK-LABEL: @positive_with_mask( ; CHECK-NEXT: [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 @@ -61,6 +73,19 @@ define i1 @positive_with_mask(i32 %arg) { ret i1 %t5 } +define i1 @positive_with_mask_logical(i32 %arg) { +; CHECK-LABEL: @positive_with_mask_logical( +; CHECK-NEXT: [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 +; CHECK-NEXT: ret i1 [[T5_SIMPLIFIED]] +; + %t1 = and i32 %arg, 1107296256 + %t2 = icmp eq i32 %t1, 0 + %t3 = add i32 %arg, 128 + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @positive_with_icmp(i32 %arg) { ; CHECK-LABEL: @positive_with_icmp( ; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 @@ -73,6 +98,18 @@ define i1 @positive_with_icmp(i32 %arg) { ret i1 %t4 } +define i1 @positive_with_icmp_logical(i32 %arg) { +; CHECK-LABEL: @positive_with_icmp_logical( +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 +; CHECK-NEXT: ret i1 [[T4_SIMPLIFIED]] +; + %t1 = icmp ult i32 %arg, 512 + %t2 = add i32 %arg, 128 + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + ; Still the same define i1 @positive_with_aggressive_icmp(i32 %arg) { ; CHECK-LABEL: @positive_with_aggressive_icmp( @@ -86,6 +123,18 @@ define i1 @positive_with_aggressive_icmp(i32 %arg) { ret i1 %t4 } +define i1 @positive_with_aggressive_icmp_logical(i32 %arg) { +; CHECK-LABEL: @positive_with_aggressive_icmp_logical( +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 +; CHECK-NEXT: ret i1 [[T4_SIMPLIFIED]] +; + %t1 = icmp ult i32 %arg, 128 + %t2 = add i32 %arg, 256 + %t3 = icmp ult i32 %t2, 512 + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + ; I'm sure there is a bunch more patterns possible :/ ; This used to trigger an assert, because the icmp's are not direct @@ -104,6 +153,20 @@ define i1 @positive_with_extra_and(i32 %arg, i1 %z) { ret i1 %t5 } +define i1 @positive_with_extra_and_logical(i32 %arg, i1 %z) { +; CHECK-LABEL: @positive_with_extra_and_logical( +; CHECK-NEXT: [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[T5_SIMPLIFIED]], [[Z:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %t1 = icmp sgt i32 %arg, -1 + %t2 = add i32 %arg, 128 + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t1, i1 %z, i1 false + %t5 = select i1 %t3, i1 %t4, i1 false + ret i1 %t5 +} + ; ============================================================================ ; ; Vector tests ; ============================================================================ ; @@ -260,6 +323,20 @@ define i1 @commutative() { ret i1 %t4 } +define i1 @commutative_logical() { +; CHECK-LABEL: @commutative_logical( +; CHECK-NEXT: [[ARG:%.*]] = call i32 @gen32() +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128 +; CHECK-NEXT: ret i1 [[T4_SIMPLIFIED]] +; + %arg = call i32 @gen32() + %t1 = icmp sgt i32 %arg, -1 + %t2 = add i32 %arg, 128 + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t3, i1 %t1, i1 false ; swapped order + ret i1 %t4 +} + define i1 @commutative_with_icmp() { ; CHECK-LABEL: @commutative_with_icmp( ; CHECK-NEXT: [[ARG:%.*]] = call i32 @gen32() @@ -274,6 +351,20 @@ define i1 @commutative_with_icmp() { ret i1 %t4 } +define i1 @commutative_with_icmp_logical() { +; CHECK-LABEL: @commutative_with_icmp_logical( +; CHECK-NEXT: [[ARG:%.*]] = call i32 @gen32() +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128 +; CHECK-NEXT: ret i1 [[T4_SIMPLIFIED]] +; + %arg = call i32 @gen32() + %t1 = icmp ult i32 %arg, 512 + %t2 = add i32 %arg, 128 + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t3, i1 %t1, i1 false ; swapped order + ret i1 %t4 +} + ; ============================================================================ ; ; Truncations. ; ============================================================================ ; @@ -291,6 +382,19 @@ define i1 @positive_trunc_signbit(i32 %arg) { ret i1 %t5 } +define i1 @positive_trunc_signbit_logical(i32 %arg) { +; CHECK-LABEL: @positive_trunc_signbit_logical( +; CHECK-NEXT: [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128 +; CHECK-NEXT: ret i1 [[T5_SIMPLIFIED]] +; + %t1 = trunc i32 %arg to i8 + %t2 = icmp sgt i8 %t1, -1 + %t3 = add i32 %arg, 128 + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @positive_trunc_base(i32 %arg) { ; CHECK-LABEL: @positive_trunc_base( ; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i16 @@ -305,6 +409,20 @@ define i1 @positive_trunc_base(i32 %arg) { ret i1 %t5 } +define i1 @positive_trunc_base_logical(i32 %arg) { +; CHECK-LABEL: @positive_trunc_base_logical( +; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i16 +; CHECK-NEXT: [[T5_SIMPLIFIED:%.*]] = icmp ult i16 [[T1]], 128 +; CHECK-NEXT: ret i1 [[T5_SIMPLIFIED]] +; + %t1 = trunc i32 %arg to i16 + %t2 = icmp sgt i16 %t1, -1 + %t3 = add i16 %t1, 128 + %t4 = icmp ult i16 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @positive_different_trunc_both(i32 %arg) { ; CHECK-LABEL: @positive_different_trunc_both( ; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i15 @@ -324,6 +442,25 @@ define i1 @positive_different_trunc_both(i32 %arg) { ret i1 %t6 } +define i1 @positive_different_trunc_both_logical(i32 %arg) { +; CHECK-LABEL: @positive_different_trunc_both_logical( +; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i15 +; CHECK-NEXT: [[T2:%.*]] = icmp sgt i15 [[T1]], -1 +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[ARG]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[T3]], 128 +; CHECK-NEXT: [[T5:%.*]] = icmp ult i16 [[T4]], 256 +; CHECK-NEXT: [[T6:%.*]] = and i1 [[T2]], [[T5]] +; CHECK-NEXT: ret i1 [[T6]] +; + %t1 = trunc i32 %arg to i15 + %t2 = icmp sgt i15 %t1, -1 + %t3 = trunc i32 %arg to i16 + %t4 = add i16 %t3, 128 + %t5 = icmp ult i16 %t4, 256 + %t6 = select i1 %t2, i1 %t5, i1 false + ret i1 %t6 +} + ; ============================================================================ ; ; One-use tests. ; @@ -357,6 +494,27 @@ define i1 @oneuse_with_signbit(i32 %arg) { ret i1 %t4 } +define i1 @oneuse_with_signbit_logical(i32 %arg) { +; CHECK-LABEL: @oneuse_with_signbit_logical( +; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 +; CHECK-NEXT: call void @use1(i1 [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG]], 128 +; CHECK-NEXT: call void @use32(i32 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 +; CHECK-NEXT: call void @use1(i1 [[T3]]) +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128 +; CHECK-NEXT: ret i1 [[T4_SIMPLIFIED]] +; + %t1 = icmp sgt i32 %arg, -1 + call void @use1(i1 %t1) + %t2 = add i32 %arg, 128 + call void @use32(i32 %t2) + %t3 = icmp ult i32 %t2, 256 + call void @use1(i1 %t3) + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + define i1 @oneuse_with_mask(i32 %arg) { ; CHECK-LABEL: @oneuse_with_mask( ; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 603979776 @@ -382,6 +540,31 @@ define i1 @oneuse_with_mask(i32 %arg) { ret i1 %t5 } +define i1 @oneuse_with_mask_logical(i32 %arg) { +; CHECK-LABEL: @oneuse_with_mask_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 603979776 +; CHECK-NEXT: call void @use32(i32 [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: call void @use1(i1 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG]], 128 +; CHECK-NEXT: call void @use32(i32 [[T3]]) +; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 +; CHECK-NEXT: call void @use1(i1 [[T4]]) +; CHECK-NEXT: [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128 +; CHECK-NEXT: ret i1 [[T5_SIMPLIFIED]] +; + %t1 = and i32 %arg, 603979776 ; some bit within the target 4294967168 mask. + call void @use32(i32 %t1) + %t2 = icmp eq i32 %t1, 0 + call void @use1(i1 %t2) + %t3 = add i32 %arg, 128 + call void @use32(i32 %t3) + %t4 = icmp ult i32 %t3, 256 + call void @use1(i1 %t4) + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @oneuse_shl_ashr(i32 %arg) { ; CHECK-LABEL: @oneuse_shl_ashr( ; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8 @@ -411,6 +594,35 @@ define i1 @oneuse_shl_ashr(i32 %arg) { ret i1 %t6 } +define i1 @oneuse_shl_ashr_logical(i32 %arg) { +; CHECK-LABEL: @oneuse_shl_ashr_logical( +; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8 +; CHECK-NEXT: call void @use8(i8 [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = icmp sgt i8 [[T1]], -1 +; CHECK-NEXT: call void @use1(i1 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[ARG]], 24 +; CHECK-NEXT: call void @use32(i32 [[T3]]) +; CHECK-NEXT: [[T4:%.*]] = ashr exact i32 [[T3]], 24 +; CHECK-NEXT: call void @use32(i32 [[T4]]) +; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], [[ARG]] +; CHECK-NEXT: call void @use1(i1 [[T5]]) +; CHECK-NEXT: [[T6:%.*]] = and i1 [[T2]], [[T5]] +; CHECK-NEXT: ret i1 [[T6]] +; + %t1 = trunc i32 %arg to i8 + call void @use8(i8 %t1) + %t2 = icmp sgt i8 %t1, -1 + call void @use1(i1 %t2) + %t3 = shl i32 %arg, 24 + call void @use32(i32 %t3) + %t4 = ashr i32 %t3, 24 + call void @use32(i32 %t4) + %t5 = icmp eq i32 %t4, %arg + call void @use1(i1 %t5) + %t6 = select i1 %t2, i1 %t5, i1 false + ret i1 %t6 +} + define zeroext i1 @oneuse_trunc_sext(i32 %arg) { ; CHECK-LABEL: @oneuse_trunc_sext( ; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8 @@ -440,6 +652,35 @@ define zeroext i1 @oneuse_trunc_sext(i32 %arg) { ret i1 %t6 } +define zeroext i1 @oneuse_trunc_sext_logical(i32 %arg) { +; CHECK-LABEL: @oneuse_trunc_sext_logical( +; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8 +; CHECK-NEXT: call void @use8(i8 [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = icmp sgt i8 [[T1]], -1 +; CHECK-NEXT: call void @use1(i1 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[ARG]] to i8 +; CHECK-NEXT: call void @use8(i8 [[T3]]) +; CHECK-NEXT: [[T4:%.*]] = sext i8 [[T3]] to i32 +; CHECK-NEXT: call void @use32(i32 [[T4]]) +; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], [[ARG]] +; CHECK-NEXT: call void @use1(i1 [[T5]]) +; CHECK-NEXT: [[T6:%.*]] = and i1 [[T2]], [[T5]] +; CHECK-NEXT: ret i1 [[T6]] +; + %t1 = trunc i32 %arg to i8 + call void @use8(i8 %t1) + %t2 = icmp sgt i8 %t1, -1 + call void @use1(i1 %t2) + %t3 = trunc i32 %arg to i8 + call void @use8(i8 %t3) + %t4 = sext i8 %t3 to i32 + call void @use32(i32 %t4) + %t5 = icmp eq i32 %t4, %arg + call void @use1(i1 %t5) + %t6 = select i1 %t2, i1 %t5, i1 false + ret i1 %t6 +} + ; ============================================================================ ; ; Negative tests ; ============================================================================ ; @@ -459,6 +700,21 @@ define i1 @negative_not_arg(i32 %arg, i32 %arg2) { ret i1 %t4 } +define i1 @negative_not_arg_logical(i32 %arg, i32 %arg2) { +; CHECK-LABEL: @negative_not_arg_logical( +; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 +; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG2:%.*]], 128 +; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 +; CHECK-NEXT: [[T4:%.*]] = and i1 [[T1]], [[T3]] +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = icmp sgt i32 %arg, -1 + %t2 = add i32 %arg2, 128 ; not %arg + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + define i1 @negative_trunc_not_arg(i32 %arg, i32 %arg2) { ; CHECK-LABEL: @negative_trunc_not_arg( ; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8 @@ -476,6 +732,23 @@ define i1 @negative_trunc_not_arg(i32 %arg, i32 %arg2) { ret i1 %t5 } +define i1 @negative_trunc_not_arg_logical(i32 %arg, i32 %arg2) { +; CHECK-LABEL: @negative_trunc_not_arg_logical( +; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8 +; CHECK-NEXT: [[T2:%.*]] = icmp sgt i8 [[T1]], -1 +; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG2:%.*]], 128 +; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = trunc i32 %arg to i8 + %t2 = icmp sgt i8 %t1, -1 + %t3 = add i32 %arg2, 128 ; not %arg + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @positive_with_mask_not_arg(i32 %arg, i32 %arg2) { ; CHECK-LABEL: @positive_with_mask_not_arg( ; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 1140850688 @@ -493,6 +766,23 @@ define i1 @positive_with_mask_not_arg(i32 %arg, i32 %arg2) { ret i1 %t5 } +define i1 @positive_with_mask_not_arg_logical(i32 %arg, i32 %arg2) { +; CHECK-LABEL: @positive_with_mask_not_arg_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 1140850688 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG2:%.*]], 128 +; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %arg, 1140850688 + %t2 = icmp eq i32 %t1, 0 + %t3 = add i32 %arg2, 128 ; not %arg + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @negative_with_nonuniform_bad_mask(i32 %arg) { ; CHECK-LABEL: @negative_with_nonuniform_bad_mask( ; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 1711276033 @@ -510,6 +800,23 @@ define i1 @negative_with_nonuniform_bad_mask(i32 %arg) { ret i1 %t5 } +define i1 @negative_with_nonuniform_bad_mask_logical(i32 %arg) { +; CHECK-LABEL: @negative_with_nonuniform_bad_mask_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 1711276033 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG]], 128 +; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %arg, 1711276033 ; lowest bit is set + %t2 = icmp eq i32 %t1, 0 + %t3 = add i32 %arg, 128 + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @negative_with_uniform_bad_mask(i32 %arg) { ; CHECK-LABEL: @negative_with_uniform_bad_mask( ; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], -16777152 @@ -527,6 +834,23 @@ define i1 @negative_with_uniform_bad_mask(i32 %arg) { ret i1 %t5 } +define i1 @negative_with_uniform_bad_mask_logical(i32 %arg) { +; CHECK-LABEL: @negative_with_uniform_bad_mask_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], -16777152 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG]], 128 +; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %arg, 4278190144 ; 7'th bit is set + %t2 = icmp eq i32 %t1, 0 + %t3 = add i32 %arg, 128 + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @negative_with_wrong_mask(i32 %arg) { ; CHECK-LABEL: @negative_with_wrong_mask( ; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 1 @@ -544,6 +868,23 @@ define i1 @negative_with_wrong_mask(i32 %arg) { ret i1 %t5 } +define i1 @negative_with_wrong_mask_logical(i32 %arg) { +; CHECK-LABEL: @negative_with_wrong_mask_logical( +; CHECK-NEXT: [[T1:%.*]] = and i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG]], 128 +; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = and i32 %arg, 1 ; not even checking the right mask + %t2 = icmp eq i32 %t1, 0 + %t3 = add i32 %arg, 128 + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @negative_not_less_than(i32 %arg) { ; CHECK-LABEL: @negative_not_less_than( ; CHECK-NEXT: ret i1 false @@ -555,6 +896,17 @@ define i1 @negative_not_less_than(i32 %arg) { ret i1 %t4 } +define i1 @negative_not_less_than_logical(i32 %arg) { +; CHECK-LABEL: @negative_not_less_than_logical( +; CHECK-NEXT: ret i1 false +; + %t1 = icmp sgt i32 %arg, -1 + %t2 = add i32 %arg, 256 ; should be less than 256 + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + define i1 @negative_not_power_of_two(i32 %arg) { ; CHECK-LABEL: @negative_not_power_of_two( ; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 @@ -570,6 +922,21 @@ define i1 @negative_not_power_of_two(i32 %arg) { ret i1 %t4 } +define i1 @negative_not_power_of_two_logical(i32 %arg) { +; CHECK-LABEL: @negative_not_power_of_two_logical( +; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 +; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG]], 255 +; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 +; CHECK-NEXT: [[T4:%.*]] = and i1 [[T1]], [[T3]] +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = icmp sgt i32 %arg, -1 + %t2 = add i32 %arg, 255 ; should be power of two + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + define i1 @negative_not_next_power_of_two(i32 %arg) { ; CHECK-LABEL: @negative_not_next_power_of_two( ; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 @@ -585,6 +952,21 @@ define i1 @negative_not_next_power_of_two(i32 %arg) { ret i1 %t4 } +define i1 @negative_not_next_power_of_two_logical(i32 %arg) { +; CHECK-LABEL: @negative_not_next_power_of_two_logical( +; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 +; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG]], 64 +; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 +; CHECK-NEXT: [[T4:%.*]] = and i1 [[T1]], [[T3]] +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = icmp sgt i32 %arg, -1 + %t2 = add i32 %arg, 64 ; should be 256 >> 1 + %t3 = icmp ult i32 %t2, 256 + %t4 = select i1 %t1, i1 %t3, i1 false + ret i1 %t4 +} + ; I don't think this can be folded, at least not into single instruction. define i1 @two_signed_truncation_checks(i32 %arg) { ; CHECK-LABEL: @two_signed_truncation_checks( @@ -603,6 +985,23 @@ define i1 @two_signed_truncation_checks(i32 %arg) { ret i1 %t5 } +define i1 @two_signed_truncation_checks_logical(i32 %arg) { +; CHECK-LABEL: @two_signed_truncation_checks_logical( +; CHECK-NEXT: [[T1:%.*]] = add i32 [[ARG:%.*]], 512 +; CHECK-NEXT: [[T2:%.*]] = icmp ult i32 [[T1]], 1024 +; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG]], 128 +; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = add i32 %arg, 512 + %t2 = icmp ult i32 %t1, 1024 + %t3 = add i32 %arg, 128 + %t4 = icmp ult i32 %t3, 256 + %t5 = select i1 %t2, i1 %t4, i1 false + ret i1 %t5 +} + define i1 @bad_trunc_stc(i32 %arg) { ; CHECK-LABEL: @bad_trunc_stc( ; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 @@ -619,3 +1018,20 @@ define i1 @bad_trunc_stc(i32 %arg) { %t5 = and i1 %t1, %t4 ret i1 %t5 } + +define i1 @bad_trunc_stc_logical(i32 %arg) { +; CHECK-LABEL: @bad_trunc_stc_logical( +; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 +; CHECK-NEXT: [[T2:%.*]] = trunc i32 [[ARG]] to i16 +; CHECK-NEXT: [[T3:%.*]] = add i16 [[T2]], 128 +; CHECK-NEXT: [[T4:%.*]] = icmp ult i16 [[T3]], 256 +; CHECK-NEXT: [[T5:%.*]] = and i1 [[T1]], [[T4]] +; CHECK-NEXT: ret i1 [[T5]] +; + %t1 = icmp sgt i32 %arg, -1 ; checks a bit outside of the i16 + %t2 = trunc i32 %arg to i16 + %t3 = add i16 %t2, 128 + %t4 = icmp ult i16 %t3, 256 + %t5 = select i1 %t1, i1 %t4, i1 false + ret i1 %t5 +} diff --git a/llvm/test/Transforms/InstCombine/umul-sign-check.ll b/llvm/test/Transforms/InstCombine/umul-sign-check.ll index aae1cbe3cd38b8..8fa659396f2e1f 100644 --- a/llvm/test/Transforms/InstCombine/umul-sign-check.ll +++ b/llvm/test/Transforms/InstCombine/umul-sign-check.ll @@ -30,6 +30,25 @@ define i1 @test1(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test1_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test1_logical( +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: store i64 [[MUL]], i64* [[PTR:%.*]], align 8 +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp ne i64 %mul, 0 + %overflow.1 = select i1 %overflow, i1 true, i1 %cmp + store i64 %mul, i64* %ptr, align 8 + ret i1 %overflow.1 +} + define i1 @test1_or_ops_swapped(i64 %a, i64 %b, i64* %ptr) { ; CHECK-LABEL: @test1_or_ops_swapped( ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] @@ -50,6 +69,26 @@ define i1 @test1_or_ops_swapped(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test1_or_ops_swapped_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test1_or_ops_swapped_logical( +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: store i64 [[MUL]], i64* [[PTR:%.*]], align 8 +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + + + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp ne i64 %mul, 0 + %overflow.1 = select i1 %cmp, i1 true, i1 %overflow + store i64 %mul, i64* %ptr, align 8 + ret i1 %overflow.1 +} + define i1 @test2(i64 %a, i64 %b, i64* %ptr) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] @@ -71,6 +110,27 @@ define i1 @test2(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test2_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test2_logical( +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]] +; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8 +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp ne i64 %mul, 0 + %overflow.1 = select i1 %overflow, i1 true, i1 %cmp + %neg = sub i64 0, %mul + store i64 %neg, i64* %ptr, align 8 + ret i1 %overflow.1 +} + declare void @use(i1) define i1 @test3_multiple_overflow_users(i64 %a, i64 %b, i64* %ptr) { @@ -92,6 +152,25 @@ define i1 @test3_multiple_overflow_users(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test3_multiple_overflow_users_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test3_multiple_overflow_users_logical( +; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: call void @use(i1 [[OVERFLOW]]) +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp ne i64 %mul, 0 + %overflow.1 = select i1 %overflow, i1 true, i1 %cmp + call void @use(i1 %overflow) + ret i1 %overflow.1 +} + ; Do not simplify if %overflow and %mul have multiple uses. define i1 @test3_multiple_overflow_and_mul_users(i64 %a, i64 %b, i64* %ptr) { ; CHECK-LABEL: @test3_multiple_overflow_and_mul_users( @@ -116,6 +195,29 @@ define i1 @test3_multiple_overflow_and_mul_users(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test3_multiple_overflow_and_mul_users_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test3_multiple_overflow_and_mul_users_logical( +; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1 +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[OVERFLOW]], [[CMP]] +; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]] +; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8 +; CHECK-NEXT: call void @use(i1 [[OVERFLOW]]) +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp ne i64 %mul, 0 + %overflow.1 = select i1 %overflow, i1 true, i1 %cmp + %neg = sub i64 0, %mul + store i64 %neg, i64* %ptr, align 8 + call void @use(i1 %overflow) + ret i1 %overflow.1 +} + declare void @use.2({ i64, i1 }) define i1 @test3_multiple_res_users(i64 %a, i64 %b, i64* %ptr) { @@ -141,6 +243,29 @@ define i1 @test3_multiple_res_users(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test3_multiple_res_users_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test3_multiple_res_users_logical( +; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]] +; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8 +; CHECK-NEXT: call void @use.2({ i64, i1 } [[RES]]) +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp ne i64 %mul, 0 + %overflow.1 = select i1 %overflow, i1 true, i1 %cmp + %neg = sub i64 0, %mul + store i64 %neg, i64* %ptr, align 8 + call void @use.2({ i64, i1 } %res) + ret i1 %overflow.1 +} + declare void @use.3(i64) ; Simplify if %mul has multiple uses. @@ -167,6 +292,29 @@ define i1 @test3_multiple_mul_users(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test3_multiple_mul_users_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test3_multiple_mul_users_logical( +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]] +; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8 +; CHECK-NEXT: call void @use.3(i64 [[MUL]]) +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp ne i64 %mul, 0 + %overflow.1 = select i1 %overflow, i1 true, i1 %cmp + %neg = sub i64 0, %mul + store i64 %neg, i64* %ptr, align 8 + call void @use.3(i64 %mul) + ret i1 %overflow.1 +} + define i1 @test4_no_icmp_ne(i64 %a, i64 %b, i64* %ptr) { @@ -190,4 +338,25 @@ define i1 @test4_no_icmp_ne(i64 %a, i64 %b, i64* %ptr) { ret i1 %overflow.1 } +define i1 @test4_no_icmp_ne_logical(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: @test4_no_icmp_ne_logical( +; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1 +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[MUL]], 0 +; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[OVERFLOW]], [[CMP]] +; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]] +; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8 +; CHECK-NEXT: ret i1 [[OVERFLOW_1]] +; + %res = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %overflow = extractvalue { i64, i1 } %res, 1 + %mul = extractvalue { i64, i1 } %res, 0 + %cmp = icmp sgt i64 %mul, 0 + %overflow.1 = select i1 %overflow, i1 true, i1 %cmp + %neg = sub i64 0, %mul + store i64 %neg, i64* %ptr, align 8 + ret i1 %overflow.1 +} + attributes #0 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/Transforms/InstCombine/usub-overflow-known-by-implied-cond.ll b/llvm/test/Transforms/InstCombine/usub-overflow-known-by-implied-cond.ll index c51ce4fdce5f64..abe6e682761fc0 100644 --- a/llvm/test/Transforms/InstCombine/usub-overflow-known-by-implied-cond.ll +++ b/llvm/test/Transforms/InstCombine/usub-overflow-known-by-implied-cond.ll @@ -6,8 +6,8 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) define i32 @test1(i32 %a, i32 %b) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: br i1 [[COND]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[BB3:%.*]], label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3]] ; CHECK: bb2: @@ -33,8 +33,8 @@ bb3: define i32 @test2(i32 %a, i32 %b) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: br i1 [[COND]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[BB3:%.*]], label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB2:%.*]] ; CHECK: bb2: @@ -203,8 +203,8 @@ bb3: define i32 @test8(i32 %a, i32 %b) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: br i1 [[COND]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[BB3:%.*]], label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[SUB1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[A]], i32 [[B]]) ; CHECK-NEXT: [[C1:%.*]] = extractvalue { i32, i1 } [[SUB1]], 1 @@ -261,6 +261,36 @@ bb3: ret i32 0 } +define i32 @test9_logical(i32 %a, i32 %b, i1 %cond2) { +; CHECK-LABEL: @test9_logical( +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[COND]], [[COND2:%.*]] +; CHECK-NEXT: br i1 [[AND]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[SUB1:%.*]] = sub nuw i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[SUB1]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; + %cond = icmp ugt i32 %a, %b + %and = select i1 %cond, i1 %cond2, i1 false + br i1 %and, label %bb1, label %bb3 + +bb1: + %sub1 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %r1 = extractvalue { i32, i1 } %sub1, 0 + %c1 = extractvalue { i32, i1 } %sub1, 1 + br i1 %c1, label %bb3, label %bb2 + +bb2: + ret i32 %r1 + +bb3: + ret i32 0 +} + define i32 @test10(i32 %a, i32 %b, i1 %cond2) { ; CHECK-LABEL: @test10( ; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] @@ -293,6 +323,38 @@ bb3: ret i32 0 } +define i32 @test10_logical(i32 %a, i32 %b, i1 %cond2) { +; CHECK-LABEL: @test10_logical( +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[COND]], [[COND2:%.*]] +; CHECK-NEXT: br i1 [[AND]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[SUB1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: [[C1:%.*]] = extractvalue { i32, i1 } [[SUB1]], 1 +; CHECK-NEXT: br i1 [[C1]], label [[BB3]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[R1:%.*]] = extractvalue { i32, i1 } [[SUB1]], 0 +; CHECK-NEXT: ret i32 [[R1]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; + %cond = icmp ugt i32 %a, %b + %and = select i1 %cond, i1 %cond2, i1 false + br i1 %and, label %bb3, label %bb1 + +bb1: + %sub1 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %r1 = extractvalue { i32, i1 } %sub1, 0 + %c1 = extractvalue { i32, i1 } %sub1, 1 + br i1 %c1, label %bb3, label %bb2 + +bb2: + ret i32 %r1 + +bb3: + ret i32 0 +} + define i32 @test11(i32 %a, i32 %b, i1 %cond2) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] @@ -325,6 +387,38 @@ bb3: ret i32 0 } +define i32 @test11_logical(i32 %a, i32 %b, i1 %cond2) { +; CHECK-LABEL: @test11_logical( +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 [[COND]], [[COND2:%.*]] +; CHECK-NEXT: br i1 [[OR]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[SUB1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: [[C1:%.*]] = extractvalue { i32, i1 } [[SUB1]], 1 +; CHECK-NEXT: br i1 [[C1]], label [[BB3]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[R1:%.*]] = extractvalue { i32, i1 } [[SUB1]], 0 +; CHECK-NEXT: ret i32 [[R1]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; + %cond = icmp ugt i32 %a, %b + %or = select i1 %cond, i1 true, i1 %cond2 + br i1 %or, label %bb1, label %bb3 + +bb1: + %sub1 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %r1 = extractvalue { i32, i1 } %sub1, 0 + %c1 = extractvalue { i32, i1 } %sub1, 1 + br i1 %c1, label %bb3, label %bb2 + +bb2: + ret i32 %r1 + +bb3: + ret i32 0 +} + define i32 @test12(i32 %a, i32 %b, i1 %cond2) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] @@ -356,3 +450,35 @@ bb2: bb3: ret i32 0 } + +define i32 @test12_logical(i32 %a, i32 %b, i1 %cond2) { +; CHECK-LABEL: @test12_logical( +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 [[COND]], [[COND2:%.*]] +; CHECK-NEXT: br i1 [[OR]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[SUB1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: [[C1:%.*]] = extractvalue { i32, i1 } [[SUB1]], 1 +; CHECK-NEXT: br i1 [[C1]], label [[BB3]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[R1:%.*]] = extractvalue { i32, i1 } [[SUB1]], 0 +; CHECK-NEXT: ret i32 [[R1]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; + %cond = icmp ugt i32 %a, %b + %or = select i1 %cond, i1 true, i1 %cond2 + br i1 %or, label %bb3, label %bb1 + +bb1: + %sub1 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %r1 = extractvalue { i32, i1 } %sub1, 0 + %c1 = extractvalue { i32, i1 } %sub1, 1 + br i1 %c1, label %bb3, label %bb2 + +bb2: + ret i32 %r1 + +bb3: + ret i32 0 +} diff --git a/llvm/test/Transforms/InstCombine/widenable-conditions.ll b/llvm/test/Transforms/InstCombine/widenable-conditions.ll index 4f36647241f884..31aa98ff998b09 100644 --- a/llvm/test/Transforms/InstCombine/widenable-conditions.ll +++ b/llvm/test/Transforms/InstCombine/widenable-conditions.ll @@ -17,6 +17,19 @@ define i1 @test1(i1 %a, i1 %b) { ret i1 %and } +define i1 @test1_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test1_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[LHS:%.*]] = and i1 [[WC]], [[B:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[LHS]], [[A:%.*]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %lhs = select i1 %b, i1 %wc, i1 false + %and = select i1 %lhs, i1 %a, i1 false + ret i1 %and +} + ; Negative test - profitability of dropping WC from first use unclear define i1 @test1b(i1 %a, i1 %b) { ; CHECK-LABEL: @test1b( @@ -33,6 +46,21 @@ define i1 @test1b(i1 %a, i1 %b) { ret i1 %and } +define i1 @test1b_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test1b_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[LHS:%.*]] = and i1 [[WC]], [[B:%.*]] +; CHECK-NEXT: call void @use(i1 [[LHS]]) +; CHECK-NEXT: [[AND:%.*]] = and i1 [[LHS]], [[A:%.*]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %lhs = select i1 %b, i1 %wc, i1 false + call void @use(i1 %lhs) + %and = select i1 %lhs, i1 %a, i1 false + ret i1 %and +} + ; multiple uses of A, B, WC doesn't change result define i1 @test1c(i1 %a, i1 %b) { ; CHECK-LABEL: @test1c( @@ -53,6 +81,25 @@ define i1 @test1c(i1 %a, i1 %b) { ret i1 %and } +define i1 @test1c_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test1c_logical( +; CHECK-NEXT: call void @use(i1 [[A:%.*]]) +; CHECK-NEXT: call void @use(i1 [[B:%.*]]) +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: call void @use(i1 [[WC]]) +; CHECK-NEXT: [[LHS:%.*]] = and i1 [[WC]], [[B]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[LHS]], [[A]] +; CHECK-NEXT: ret i1 [[AND]] +; + call void @use(i1 %a) + call void @use(i1 %b) + %wc = call i1 @llvm.experimental.widenable.condition() + call void @use(i1 %wc) + %lhs = select i1 %b, i1 %wc, i1 false + %and = select i1 %lhs, i1 %a, i1 false + ret i1 %and +} + define i1 @test2(i1 %a, i1 %b) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() @@ -66,6 +113,19 @@ define i1 @test2(i1 %a, i1 %b) { ret i1 %and } +define i1 @test2_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test2_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[LHS:%.*]] = and i1 [[WC]], [[B:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[LHS]], [[A:%.*]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %lhs = select i1 %wc, i1 %b, i1 false + %and = select i1 %lhs, i1 %a, i1 false + ret i1 %and +} + ; To test the rhs side, an instruction on lhs to prevent complexity ; canonicalization reducing to above. define i1 @test3(i1 %a, i1 %b, i1 %c) { @@ -83,6 +143,21 @@ define i1 @test3(i1 %a, i1 %b, i1 %c) { ret i1 %and } +define i1 @test3_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @test3_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[LHS:%.*]] = and i1 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = and i1 [[WC]], [[C:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %lhs = select i1 %a, i1 %b, i1 false + %rhs = select i1 %c, i1 %wc, i1 false + %and = select i1 %lhs, i1 %rhs, i1 false + ret i1 %and +} + define i1 @test4(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() @@ -98,6 +173,21 @@ define i1 @test4(i1 %a, i1 %b, i1 %c) { ret i1 %and } +define i1 @test4_logical(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @test4_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[LHS:%.*]] = and i1 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[RHS:%.*]] = and i1 [[WC]], [[C:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %lhs = select i1 %a, i1 %b, i1 false + %rhs = select i1 %wc, i1 %c, i1 false + %and = select i1 %lhs, i1 %rhs, i1 false + ret i1 %and +} + define i1 @test5(i1 %a, i1 %b) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() @@ -108,6 +198,16 @@ define i1 @test5(i1 %a, i1 %b) { ret i1 %and } +define i1 @test5_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test5_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: ret i1 [[WC]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %and = select i1 %wc, i1 %wc, i1 false + ret i1 %and +} + define i1 @test6(i1 %a, i1 %b) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() @@ -121,6 +221,19 @@ define i1 @test6(i1 %a, i1 %b) { ret i1 %and } +define i1 @test6_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test6_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[WC2:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[AND:%.*]] = and i1 [[WC]], [[WC2]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %wc2 = call i1 @llvm.experimental.widenable.condition() + %and = select i1 %wc, i1 %wc2, i1 false + ret i1 %and +} + define i1 @test7(i1 %a, i1 %b) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() @@ -136,6 +249,21 @@ define i1 @test7(i1 %a, i1 %b) { ret i1 %and } +define i1 @test7_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test7_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: call void @use(i1 [[WC]]) +; CHECK-NEXT: [[WC2:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[AND:%.*]] = and i1 [[WC]], [[WC2]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + call void @use(i1 %wc) + %wc2 = call i1 @llvm.experimental.widenable.condition() + %and = select i1 %wc, i1 %wc2, i1 false + ret i1 %and +} + define i1 @test8(i1 %a, i1 %b) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() @@ -151,6 +279,21 @@ define i1 @test8(i1 %a, i1 %b) { ret i1 %and } +define i1 @test8_logical(i1 %a, i1 %b) { +; CHECK-LABEL: @test8_logical( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[WC2:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: call void @use(i1 [[WC2]]) +; CHECK-NEXT: [[AND:%.*]] = and i1 [[WC]], [[WC2]] +; CHECK-NEXT: ret i1 [[AND]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %wc2 = call i1 @llvm.experimental.widenable.condition() + call void @use(i1 %wc2) + %and = select i1 %wc, i1 %wc2, i1 false + ret i1 %and +} + declare void @use(i1) declare i1 @llvm.experimental.widenable.condition() diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index 8223479c81348a..a77aa7ac7ebd81 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -20,6 +20,23 @@ define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) { ret i8 %zext } +define i8 @zext_or_icmp_icmp_logical(i8 %a, i8 %b) { +; CHECK-LABEL: @zext_or_icmp_icmp_logical( +; CHECK-NEXT: [[MASK:%.*]] = and i8 [[A:%.*]], 1 +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 [[B:%.*]], 0 +; CHECK-NEXT: [[TOBOOL22:%.*]] = zext i1 [[TOBOOL2]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[MASK]], 1 +; CHECK-NEXT: [[ZEXT3:%.*]] = or i8 [[TMP1]], [[TOBOOL22]] +; CHECK-NEXT: ret i8 [[ZEXT3]] +; + %mask = and i8 %a, 1 + %toBool1 = icmp eq i8 %mask, 0 + %toBool2 = icmp eq i8 %b, 0 + %bothCond = select i1 %toBool1, i1 true, i1 %toBool2 + %zext = zext i1 %bothCond to i8 + ret i8 %zext +} + ; Here, widening the or from i1 to i32 and removing one of the icmps would ; widen an undef value (created by the out-of-range shift), increasing the ; range of valid values for the return, so we can't do it. @@ -56,3 +73,36 @@ block2: %conv2 = zext i1 %cmp1 to i32 ret i32 %conv2 } + +define i32 @dont_widen_undef_logical() { +; CHECK-LABEL: @dont_widen_undef_logical( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK2:%.*]] +; CHECK: block1: +; CHECK-NEXT: br label [[BLOCK2]] +; CHECK: block2: +; CHECK-NEXT: [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[M_011:%.*]] = phi i32 [ 0, [[BLOCK1]] ], [ 33, [[ENTRY]] ] +; CHECK-NEXT: [[M_1_OP:%.*]] = lshr i32 1, [[M_011]] +; CHECK-NEXT: [[SEXT_MASK:%.*]] = and i32 [[M_1_OP]], 65535 +; CHECK-NEXT: [[CMP115:%.*]] = icmp ne i32 [[SEXT_MASK]], 0 +; CHECK-NEXT: [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]] +; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[CMP1]] to i32 +; CHECK-NEXT: ret i32 [[CONV2]] +; +entry: + br label %block2 + +block1: + br label %block2 + +block2: + %m.011 = phi i32 [ 33, %entry ], [ 0, %block1 ] + %cmp.i = icmp ugt i32 %m.011, 1 + %m.1.op = lshr i32 1, %m.011 + %sext.mask = and i32 %m.1.op, 65535 + %cmp115 = icmp ne i32 %sext.mask, 0 + %cmp1 = select i1 %cmp.i, i1 true, i1 %cmp115 + %conv2 = zext i1 %cmp1 to i32 + ret i32 %conv2 +} From caafdf07bbccbe89219539e2b56043c2a98358f1 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 12 Jan 2021 12:54:07 -0800 Subject: [PATCH 67/86] [LV] Weaken spuriously strong assert in LoopVersioning LoopVectorize uses some utilities on LoopVersioning, but doesn't actually use it for, you know, versioning. As a result, the precondition LoopVersioning expects is too strong for this user. At the moment, LoopVectorize supports any loop with a unique exit block, so check the same precondition here. Really, the whole class structure here is a mess. We should separate the actual versioning from the metadata updates, but that's a bigger problem. --- llvm/lib/Transforms/Utils/LoopVersioning.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index b54aee35d56d56..599bd1feb2bc32 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -44,7 +44,7 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, AliasChecks(Checks.begin(), Checks.end()), Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT), SE(SE) { - assert(L->getExitBlock() && "No single exit block"); + assert(L->getUniqueExitBlock() && "No single exit block"); } void LoopVersioning::versionLoop( From 46507a96fc13146f73e5915a008055c5a59191c2 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 12 Jan 2021 13:45:32 -0500 Subject: [PATCH 68/86] [SLP] reduce code duplication while matching reductions; NFC --- .../Transforms/Vectorize/SLPVectorizer.cpp | 69 +++++++++---------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bd673d112b3ae1..ff22572782e22c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6857,49 +6857,48 @@ class HorizontalReduction { // Visit left or right. Value *NextV = TreeN->getOperand(EdgeToVisit); - if (NextV != Phi) { - auto *I = dyn_cast(NextV); - OpData = getOperationData(I); - // Continue analysis if the next operand is a reduction operation or - // (possibly) a reduced value. If the reduced value opcode is not set, - // the first met operation != reduction operation is considered as the - // reduced value class. - const bool IsRdxInst = OpData == RdxTreeInst; - if (I && (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { - // Only handle trees in the current basic block. - if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; - } + auto *I = dyn_cast(NextV); + OpData = getOperationData(I); + // Continue analysis if the next operand is a reduction operation or + // (possibly) a reduced value. If the reduced value opcode is not set, + // the first met operation != reduction operation is considered as the + // reduced value class. + const bool IsRdxInst = OpData == RdxTreeInst; + if (I && I != Phi && + (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { + // Only handle trees in the current basic block. + if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) { + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); + continue; + } - // Each tree node needs to have minimal number of users except for the - // ultimate reduction. - if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; - } + // Each tree node needs to have minimal number of users except for the + // ultimate reduction. + if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); + continue; + } - if (IsRdxInst) { - // We need to be able to reassociate the reduction operations. - if (!OpData.isAssociative(I)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; - } - } else if (RdxLeafVal && RdxLeafVal != OpData) { - // Make sure that the opcodes of the operations that we are going to - // reduce match. + if (IsRdxInst) { + // We need to be able to reassociate the reduction operations. + if (!OpData.isAssociative(I)) { // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; - } else if (!RdxLeafVal) { - RdxLeafVal = OpData; } - Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); + } else if (RdxLeafVal && RdxLeafVal != OpData) { + // Make sure that the opcodes of the operations that we are going to + // reduce match. + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); continue; + } else if (!RdxLeafVal) { + RdxLeafVal = OpData; } + Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); + continue; } // NextV is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), NextV); From 554be30a42802d66807f93e4671a518c1c04e0f8 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 12 Jan 2021 13:53:18 -0500 Subject: [PATCH 69/86] [SLP] reduce code duplication in processing reductions; NFC --- .../Transforms/Vectorize/SLPVectorizer.cpp | 39 +++++++------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ff22572782e22c..04bdc74c7879a6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6867,38 +6867,29 @@ class HorizontalReduction { if (I && I != Phi && (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { // Only handle trees in the current basic block. - if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; - } - // Each tree node needs to have minimal number of users except for the // ultimate reduction. - if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; - } - - if (IsRdxInst) { - // We need to be able to reassociate the reduction operations. - if (!OpData.isAssociative(I)) { + if (RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) && + RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { + if (IsRdxInst) { + // We need to be able to reassociate the reduction operations. + if (!OpData.isAssociative(I)) { + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); + continue; + } + } else if (RdxLeafVal && RdxLeafVal != OpData) { + // Make sure that the opcodes of the operations that we are going to + // reduce match. // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; + } else if (!RdxLeafVal) { + RdxLeafVal = OpData; } - } else if (RdxLeafVal && RdxLeafVal != OpData) { - // Make sure that the opcodes of the operations that we are going to - // reduce match. - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); + Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); continue; - } else if (!RdxLeafVal) { - RdxLeafVal = OpData; } - Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); - continue; } // NextV is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), NextV); From 92fb5c49e8aa53ac97fa2fb1a891a4d7ccfd75c5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 12 Jan 2021 14:55:09 -0500 Subject: [PATCH 70/86] [SLP] rename variable to improve readability; NFC The OperationData in the 2nd block (visiting the operands) is completely independent of the 1st block. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 04bdc74c7879a6..1ef762c9dfa777 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6826,7 +6826,7 @@ class HorizontalReduction { while (!Stack.empty()) { Instruction *TreeN = Stack.back().first; unsigned EdgeToVisit = Stack.back().second++; - OperationData OpData = getOperationData(TreeN); + const OperationData OpData = getOperationData(TreeN); bool IsReducedValue = OpData != RdxTreeInst; // Postorder vist. @@ -6858,14 +6858,14 @@ class HorizontalReduction { // Visit left or right. Value *NextV = TreeN->getOperand(EdgeToVisit); auto *I = dyn_cast(NextV); - OpData = getOperationData(I); + const OperationData EdgeOpData = getOperationData(I); // Continue analysis if the next operand is a reduction operation or // (possibly) a reduced value. If the reduced value opcode is not set, // the first met operation != reduction operation is considered as the // reduced value class. - const bool IsRdxInst = OpData == RdxTreeInst; + const bool IsRdxInst = EdgeOpData == RdxTreeInst; if (I && I != Phi && - (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { + (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) { // Only handle trees in the current basic block. // Each tree node needs to have minimal number of users except for the // ultimate reduction. @@ -6873,21 +6873,21 @@ class HorizontalReduction { RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { if (IsRdxInst) { // We need to be able to reassociate the reduction operations. - if (!OpData.isAssociative(I)) { + if (!EdgeOpData.isAssociative(I)) { // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; } - } else if (RdxLeafVal && RdxLeafVal != OpData) { + } else if (RdxLeafVal && RdxLeafVal != EdgeOpData) { // Make sure that the opcodes of the operations that we are going to // reduce match. // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; } else if (!RdxLeafVal) { - RdxLeafVal = OpData; + RdxLeafVal = EdgeOpData; } - Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); + Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex())); continue; } } From 9e7895a8682ce3ad98c006955d573d5f2fded4f6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 12 Jan 2021 15:07:01 -0500 Subject: [PATCH 71/86] [SLP] reduce code duplication while processing reductions; NFC --- .../Transforms/Vectorize/SLPVectorizer.cpp | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1ef762c9dfa777..403170447f5a89 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6863,33 +6863,32 @@ class HorizontalReduction { // (possibly) a reduced value. If the reduced value opcode is not set, // the first met operation != reduction operation is considered as the // reduced value class. + // Only handle trees in the current basic block. + // Each tree node needs to have minimal number of users except for the + // ultimate reduction. const bool IsRdxInst = EdgeOpData == RdxTreeInst; - if (I && I != Phi && + if (I && I != Phi && I != B && + RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) && + RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) { - // Only handle trees in the current basic block. - // Each tree node needs to have minimal number of users except for the - // ultimate reduction. - if (RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) && - RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { - if (IsRdxInst) { - // We need to be able to reassociate the reduction operations. - if (!EdgeOpData.isAssociative(I)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; - } - } else if (RdxLeafVal && RdxLeafVal != EdgeOpData) { - // Make sure that the opcodes of the operations that we are going to - // reduce match. + if (IsRdxInst) { + // We need to be able to reassociate the reduction operations. + if (!EdgeOpData.isAssociative(I)) { // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; - } else if (!RdxLeafVal) { - RdxLeafVal = EdgeOpData; } - Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex())); + } else if (RdxLeafVal && RdxLeafVal != EdgeOpData) { + // Make sure that the opcodes of the operations that we are going to + // reduce match. + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); continue; + } else if (!RdxLeafVal) { + RdxLeafVal = EdgeOpData; } + Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex())); + continue; } // NextV is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), NextV); From 7583ae48a3c37a78e57106e4feff6045aaa45584 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 12 Jan 2021 13:08:58 -0800 Subject: [PATCH 72/86] [RISCV] Add double test cases to vfmerge-rv32.ll. NFC --- llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll | 218 +++++++++++++++++++- 1 file changed, 217 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll index 5a7262c348c5c0..21a2b73f74ba3a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( , @@ -440,6 +440,166 @@ entry: ret %a } +declare @llvm.riscv.vfmerge.nxv1f64.nxv1f64( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f64.nxv1f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f64.f64( + , + double, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64( %0, double %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f64.f64( + %0, + double %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f64.nxv2f64( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f64.nxv2f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f64.f64( + , + double, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64( %0, double %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f64.f64( + %0, + double %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f64.nxv4f64( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f64.nxv4f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f64.f64( + , + double, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64( %0, double %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f64.f64( + %0, + double %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f64.nxv8f64( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f64.nxv8f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f64.f64( + , + double, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64( %0, double %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f64.f64( + %0, + double %1, + %2, + i32 %3) + + ret %a +} + define @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16( %0, %1, i32 %2) nounwind { entry: ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16 @@ -593,3 +753,59 @@ entry: ret %a } + +define @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv1f64.f64( + %0, + double zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv2f64.f64( + %0, + double zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv4f64.f64( + %0, + double zeroinitializer, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 0, v0 + %a = call @llvm.riscv.vfmerge.nxv8f64.f64( + %0, + double zeroinitializer, + %1, + i32 %2) + + ret %a +} From e15f3ddcae65525176d1f152effb88cd3c6441a3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 12 Jan 2021 22:15:54 +0100 Subject: [PATCH 73/86] [InstCombine] Add tests for logical and/or poison implication (NFC) These tests cover some cases where we can fold select to and/or based on poison implication logic. --- .../Transforms/InstCombine/select-and-or.ll | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll index 5fab7cdb94a628..59fa170b73d454 100644 --- a/llvm/test/Transforms/InstCombine/select-and-or.ll +++ b/llvm/test/Transforms/InstCombine/select-and-or.ll @@ -85,3 +85,99 @@ define i1 @logical_or_not_cond_reuse(i1 %a, i1 %b) { %res = select i1 %a, i1 %a.not, i1 %b ret i1 %res } + +; Safe to convert to or due to poison implication. +define i1 @logical_or_implies(i32 %x) { +; CHECK-LABEL: @logical_or_implies( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X]], 42 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] +; CHECK-NEXT: ret i1 [[RES]] +; + %c1 = icmp eq i32 %x, 0 + %c2 = icmp eq i32 %x, 42 + %res = select i1 %c1, i1 true, i1 %c2 + ret i1 %res +} + +; Will fold after conversion to or. +define i1 @logical_or_implies_folds(i32 %x) { +; CHECK-LABEL: @logical_or_implies_folds( +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] +; CHECK-NEXT: ret i1 [[RES]] +; + %c1 = icmp slt i32 %x, 0 + %c2 = icmp sge i32 %x, 0 + %res = select i1 %c1, i1 true, i1 %c2 + ret i1 %res +} + +; Safe to convert to and due to poison implication. +define i1 @logical_and_implies(i32 %x) { +; CHECK-LABEL: @logical_and_implies( +; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: [[C2:%.*]] = icmp ne i32 [[X]], 42 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false +; CHECK-NEXT: ret i1 [[RES]] +; + %c1 = icmp ne i32 %x, 0 + %c2 = icmp ne i32 %x, 42 + %res = select i1 %c1, i1 %c2, i1 false + ret i1 %res +} + +; Will fold after conversion to and. +define i1 @logical_and_implies_folds(i32 %x) { +; CHECK-LABEL: @logical_and_implies_folds( +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[X:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false +; CHECK-NEXT: ret i1 [[RES]] +; + %c1 = icmp ugt i32 %x, 42 + %c2 = icmp ne i32 %x, 0 + %res = select i1 %c1, i1 %c2, i1 false + ret i1 %res +} + +; Noundef on condition has no effect. +define i1 @logical_or_noundef_a(i1 noundef %a, i1 %b) { +; CHECK-LABEL: @logical_or_noundef_a( +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]] +; CHECK-NEXT: ret i1 [[RES]] +; + %res = select i1 %a, i1 true, i1 %b + ret i1 %res +} + +; Noundef on false value allows conversion to or. +define i1 @logical_or_noundef_b(i1 %a, i1 noundef %b) { +; CHECK-LABEL: @logical_or_noundef_b( +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]] +; CHECK-NEXT: ret i1 [[RES]] +; + %res = select i1 %a, i1 true, i1 %b + ret i1 %res +} + +; Noundef on condition has no effect. +define i1 @logical_and_noundef_a(i1 noundef %a, i1 %b) { +; CHECK-LABEL: @logical_and_noundef_a( +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false +; CHECK-NEXT: ret i1 [[RES]] +; + %res = select i1 %a, i1 %b, i1 false + ret i1 %res +} + +; Noundef on false value allows conversion to and. +define i1 @logical_and_noundef_b(i1 %a, i1 noundef %b) { +; CHECK-LABEL: @logical_and_noundef_b( +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false +; CHECK-NEXT: ret i1 [[RES]] +; + %res = select i1 %a, i1 %b, i1 false + ret i1 %res +} From 71ed4b6ce57d8843ef705af8f98305976a8f107a Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Tue, 12 Jan 2021 21:22:34 +0000 Subject: [PATCH 74/86] [RISCV] Legalize select when Zbt extension available The custom expansion of select operations in the RISC-V backend interferes with the matching of cmov instructions. Legalizing select when the Zbt extension is available solves that problem. Reviewed By: lenary, craig.topper Differential Revision: https://reviews.llvm.org/D93767 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +- llvm/lib/Target/RISCV/RISCVInstrInfoB.td | 18 +- llvm/test/CodeGen/RISCV/rv32Zbb.ll | 137 ++---- llvm/test/CodeGen/RISCV/rv32Zbbp.ll | 138 +++--- llvm/test/CodeGen/RISCV/rv32Zbs.ll | 52 +-- llvm/test/CodeGen/RISCV/rv32Zbt.ll | 480 +++++++------------- llvm/test/CodeGen/RISCV/rv64Zbt.ll | 112 ++--- 7 files changed, 354 insertions(+), 587 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03db9911c86718..73bc83b558adcd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -162,7 +162,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_CC, XLenVT, Expand); - setOperationAction(ISD::SELECT, XLenVT, Custom); setOperationAction(ISD::SELECT_CC, XLenVT, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); @@ -249,11 +248,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtZbt()) { setOperationAction(ISD::FSHL, XLenVT, Legal); setOperationAction(ISD::FSHR, XLenVT, Legal); + setOperationAction(ISD::SELECT, XLenVT, Legal); if (Subtarget.is64Bit()) { setOperationAction(ISD::FSHL, MVT::i32, Custom); setOperationAction(ISD::FSHR, MVT::i32, Custom); } + } else { + setOperationAction(ISD::SELECT, XLenVT, Custom); } ISD::CondCode FPCCToExpand[] = { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td index ce6cb6ba82ce9a..47740308518f43 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -797,7 +797,23 @@ def : Pat<(rotl (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8) let Predicates = [HasStdExtZbt] in { def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1), +def : Pat<(select (XLenVT (setne GPR:$rs2, 0)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq GPR:$rs2, 0)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq GPR:$x, GPR:$y)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setne GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setuge GPR:$x, GPR:$y)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setule GPR:$y, GPR:$x)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setge GPR:$x, GPR:$y)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setle GPR:$y, GPR:$x)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select GPR:$rs2, GPR:$rs3, GPR:$rs1), (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; } // Predicates = [HasStdExtZbt] diff --git a/llvm/test/CodeGen/RISCV/rv32Zbb.ll b/llvm/test/CodeGen/RISCV/rv32Zbb.ll index b95fcd5f523258..90ea5629aae6aa 100644 --- a/llvm/test/CodeGen/RISCV/rv32Zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbb.ll @@ -60,14 +60,7 @@ define i64 @slo_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: slo_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: addi a3, a2, -32 ; RV32IB-NEXT: not a0, a0 -; RV32IB-NEXT: bltz a3, .LBB1_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a2, zero -; RV32IB-NEXT: sll a1, a0, a3 -; RV32IB-NEXT: j .LBB1_3 -; RV32IB-NEXT: .LBB1_2: ; RV32IB-NEXT: not a1, a1 ; RV32IB-NEXT: sll a1, a1, a2 ; RV32IB-NEXT: addi a3, zero, 31 @@ -75,10 +68,15 @@ define i64 @slo_i64(i64 %a, i64 %b) nounwind { ; RV32IB-NEXT: srli a4, a0, 1 ; RV32IB-NEXT: srl a3, a4, a3 ; RV32IB-NEXT: or a1, a1, a3 -; RV32IB-NEXT: sll a2, a0, a2 -; RV32IB-NEXT: .LBB1_3: +; RV32IB-NEXT: addi a3, a2, -32 +; RV32IB-NEXT: sll a4, a0, a3 +; RV32IB-NEXT: slti a5, a3, 0 +; RV32IB-NEXT: cmov a1, a5, a4, a1 +; RV32IB-NEXT: sll a0, a0, a2 +; RV32IB-NEXT: srai a2, a3, 31 +; RV32IB-NEXT: and a0, a2, a0 ; RV32IB-NEXT: not a1, a1 -; RV32IB-NEXT: not a0, a2 +; RV32IB-NEXT: not a0, a0 ; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: slo_i64: @@ -163,14 +161,7 @@ define i64 @sro_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: sro_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: addi a3, a2, -32 ; RV32IB-NEXT: not a1, a1 -; RV32IB-NEXT: bltz a3, .LBB3_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a2, zero -; RV32IB-NEXT: srl a0, a1, a3 -; RV32IB-NEXT: j .LBB3_3 -; RV32IB-NEXT: .LBB3_2: ; RV32IB-NEXT: not a0, a0 ; RV32IB-NEXT: srl a0, a0, a2 ; RV32IB-NEXT: addi a3, zero, 31 @@ -178,10 +169,15 @@ define i64 @sro_i64(i64 %a, i64 %b) nounwind { ; RV32IB-NEXT: slli a4, a1, 1 ; RV32IB-NEXT: sll a3, a4, a3 ; RV32IB-NEXT: or a0, a0, a3 -; RV32IB-NEXT: srl a2, a1, a2 -; RV32IB-NEXT: .LBB3_3: +; RV32IB-NEXT: addi a3, a2, -32 +; RV32IB-NEXT: srl a4, a1, a3 +; RV32IB-NEXT: slti a5, a3, 0 +; RV32IB-NEXT: cmov a0, a5, a4, a0 +; RV32IB-NEXT: srl a1, a1, a2 +; RV32IB-NEXT: srai a2, a3, 31 +; RV32IB-NEXT: and a1, a2, a1 ; RV32IB-NEXT: not a0, a0 -; RV32IB-NEXT: not a1, a2 +; RV32IB-NEXT: not a1, a1 ; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: sro_i64: @@ -466,14 +462,10 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; ; RV32IB-LABEL: ctlz_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: bnez a1, .LBB9_2 -; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: clz a2, a1 ; RV32IB-NEXT: clz a0, a0 ; RV32IB-NEXT: addi a0, a0, 32 -; RV32IB-NEXT: mv a1, zero -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB9_2: -; RV32IB-NEXT: clz a0, a1 +; RV32IB-NEXT: cmov a0, a1, a2, a0 ; RV32IB-NEXT: mv a1, zero ; RV32IB-NEXT: ret ; @@ -623,14 +615,10 @@ define i64 @cttz_i64(i64 %a) nounwind { ; ; RV32IB-LABEL: cttz_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: bnez a0, .LBB11_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: ctz a0, a1 -; RV32IB-NEXT: addi a0, a0, 32 -; RV32IB-NEXT: mv a1, zero -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB11_2: -; RV32IB-NEXT: ctz a0, a0 +; RV32IB-NEXT: ctz a2, a0 +; RV32IB-NEXT: ctz a1, a1 +; RV32IB-NEXT: addi a1, a1, 32 +; RV32IB-NEXT: cmov a0, a0, a2, a1 ; RV32IB-NEXT: mv a1, zero ; RV32IB-NEXT: ret ; @@ -910,18 +898,11 @@ define i64 @min_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: min_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: mv a4, a0 -; RV32IB-NEXT: bge a1, a3, .LBB19_3 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: beq a1, a3, .LBB19_4 -; RV32IB-NEXT: .LBB19_2: -; RV32IB-NEXT: min a1, a1, a3 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB19_3: -; RV32IB-NEXT: mv a0, a2 -; RV32IB-NEXT: bne a1, a3, .LBB19_2 -; RV32IB-NEXT: .LBB19_4: -; RV32IB-NEXT: minu a0, a4, a2 +; RV32IB-NEXT: slt a4, a1, a3 +; RV32IB-NEXT: cmov a4, a4, a2, a0 +; RV32IB-NEXT: xor a5, a1, a3 +; RV32IB-NEXT: minu a0, a0, a2 +; RV32IB-NEXT: cmov a0, a5, a0, a4 ; RV32IB-NEXT: min a1, a1, a3 ; RV32IB-NEXT: ret ; @@ -993,18 +974,11 @@ define i64 @max_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: max_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: mv a4, a0 -; RV32IB-NEXT: bge a3, a1, .LBB21_3 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: beq a1, a3, .LBB21_4 -; RV32IB-NEXT: .LBB21_2: -; RV32IB-NEXT: max a1, a1, a3 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB21_3: -; RV32IB-NEXT: mv a0, a2 -; RV32IB-NEXT: bne a1, a3, .LBB21_2 -; RV32IB-NEXT: .LBB21_4: -; RV32IB-NEXT: maxu a0, a4, a2 +; RV32IB-NEXT: slt a4, a3, a1 +; RV32IB-NEXT: cmov a4, a4, a2, a0 +; RV32IB-NEXT: xor a5, a1, a3 +; RV32IB-NEXT: maxu a0, a0, a2 +; RV32IB-NEXT: cmov a0, a5, a0, a4 ; RV32IB-NEXT: max a1, a1, a3 ; RV32IB-NEXT: ret ; @@ -1076,18 +1050,11 @@ define i64 @minu_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: minu_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: mv a4, a0 -; RV32IB-NEXT: bgeu a1, a3, .LBB23_3 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: beq a1, a3, .LBB23_4 -; RV32IB-NEXT: .LBB23_2: -; RV32IB-NEXT: minu a1, a1, a3 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB23_3: -; RV32IB-NEXT: mv a0, a2 -; RV32IB-NEXT: bne a1, a3, .LBB23_2 -; RV32IB-NEXT: .LBB23_4: -; RV32IB-NEXT: minu a0, a4, a2 +; RV32IB-NEXT: sltu a4, a1, a3 +; RV32IB-NEXT: cmov a4, a4, a2, a0 +; RV32IB-NEXT: xor a5, a1, a3 +; RV32IB-NEXT: minu a0, a0, a2 +; RV32IB-NEXT: cmov a0, a5, a0, a4 ; RV32IB-NEXT: minu a1, a1, a3 ; RV32IB-NEXT: ret ; @@ -1159,18 +1126,11 @@ define i64 @maxu_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: maxu_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: mv a4, a0 -; RV32IB-NEXT: bgeu a3, a1, .LBB25_3 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: beq a1, a3, .LBB25_4 -; RV32IB-NEXT: .LBB25_2: -; RV32IB-NEXT: maxu a1, a1, a3 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB25_3: -; RV32IB-NEXT: mv a0, a2 -; RV32IB-NEXT: bne a1, a3, .LBB25_2 -; RV32IB-NEXT: .LBB25_4: -; RV32IB-NEXT: maxu a0, a4, a2 +; RV32IB-NEXT: sltu a4, a3, a1 +; RV32IB-NEXT: cmov a4, a4, a2, a0 +; RV32IB-NEXT: xor a5, a1, a3 +; RV32IB-NEXT: maxu a0, a0, a2 +; RV32IB-NEXT: cmov a0, a5, a0, a4 ; RV32IB-NEXT: maxu a1, a1, a3 ; RV32IB-NEXT: ret ; @@ -1236,13 +1196,14 @@ define i64 @abs_i64(i64 %x) { ; ; RV32IB-LABEL: abs_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: bgez a1, .LBB27_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: snez a2, a0 +; RV32IB-NEXT: neg a2, a0 +; RV32IB-NEXT: slti a3, a1, 0 +; RV32IB-NEXT: cmov a2, a3, a0, a2 +; RV32IB-NEXT: snez a0, a0 +; RV32IB-NEXT: add a0, a1, a0 ; RV32IB-NEXT: neg a0, a0 -; RV32IB-NEXT: add a1, a1, a2 -; RV32IB-NEXT: neg a1, a1 -; RV32IB-NEXT: .LBB27_2: +; RV32IB-NEXT: cmov a1, a3, a1, a0 +; RV32IB-NEXT: mv a0, a2 ; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: abs_i64: diff --git a/llvm/test/CodeGen/RISCV/rv32Zbbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll index 0ae6d757287fe5..3a18b17b9d9549 100644 --- a/llvm/test/CodeGen/RISCV/rv32Zbbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll @@ -264,48 +264,37 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: rol_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: andi a3, a2, 63 -; RV32IB-NEXT: addi t1, a3, -32 +; RV32IB-NEXT: sll a7, a1, a2 +; RV32IB-NEXT: andi a4, a2, 63 ; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: bltz t1, .LBB7_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: sll a7, a0, t1 -; RV32IB-NEXT: j .LBB7_3 -; RV32IB-NEXT: .LBB7_2: -; RV32IB-NEXT: sll a4, a1, a2 -; RV32IB-NEXT: sub a3, a6, a3 -; RV32IB-NEXT: srli a5, a0, 1 -; RV32IB-NEXT: srl a3, a5, a3 -; RV32IB-NEXT: or a7, a4, a3 -; RV32IB-NEXT: .LBB7_3: -; RV32IB-NEXT: neg a4, a2 -; RV32IB-NEXT: andi a5, a4, 63 -; RV32IB-NEXT: addi a3, a5, -32 -; RV32IB-NEXT: bltz a3, .LBB7_7 -; RV32IB-NEXT: # %bb.4: -; RV32IB-NEXT: mv t0, zero -; RV32IB-NEXT: bgez a3, .LBB7_8 -; RV32IB-NEXT: .LBB7_5: -; RV32IB-NEXT: srl a3, a0, a4 -; RV32IB-NEXT: sub a4, a6, a5 -; RV32IB-NEXT: slli a1, a1, 1 -; RV32IB-NEXT: sll a1, a1, a4 -; RV32IB-NEXT: or a4, a3, a1 -; RV32IB-NEXT: or a1, a7, t0 -; RV32IB-NEXT: bgez t1, .LBB7_9 -; RV32IB-NEXT: .LBB7_6: +; RV32IB-NEXT: sub a5, a6, a4 +; RV32IB-NEXT: srli a3, a0, 1 +; RV32IB-NEXT: srl a3, a3, a5 +; RV32IB-NEXT: or a7, a7, a3 +; RV32IB-NEXT: addi t1, a4, -32 +; RV32IB-NEXT: sll a5, a0, t1 +; RV32IB-NEXT: slti a3, t1, 0 +; RV32IB-NEXT: cmov a7, a3, a5, a7 +; RV32IB-NEXT: neg a5, a2 +; RV32IB-NEXT: srl t0, a1, a5 +; RV32IB-NEXT: andi t2, a5, 63 +; RV32IB-NEXT: addi a4, t2, -32 +; RV32IB-NEXT: srai a3, a4, 31 +; RV32IB-NEXT: and a3, a3, t0 +; RV32IB-NEXT: or a7, a7, a3 +; RV32IB-NEXT: srl t0, a0, a5 +; RV32IB-NEXT: sub a5, a6, t2 +; RV32IB-NEXT: slli a3, a1, 1 +; RV32IB-NEXT: sll a3, a3, a5 +; RV32IB-NEXT: or a3, t0, a3 +; RV32IB-NEXT: srl a1, a1, a4 +; RV32IB-NEXT: slti a4, a4, 0 +; RV32IB-NEXT: cmov a1, a4, a1, a3 ; RV32IB-NEXT: sll a0, a0, a2 -; RV32IB-NEXT: or a0, a0, a4 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB7_7: -; RV32IB-NEXT: srl t0, a1, a4 -; RV32IB-NEXT: bltz a3, .LBB7_5 -; RV32IB-NEXT: .LBB7_8: -; RV32IB-NEXT: srl a4, a1, a3 -; RV32IB-NEXT: or a1, a7, t0 -; RV32IB-NEXT: bltz t1, .LBB7_6 -; RV32IB-NEXT: .LBB7_9: -; RV32IB-NEXT: or a0, zero, a4 +; RV32IB-NEXT: srai a2, t1, 31 +; RV32IB-NEXT: and a0, a2, a0 +; RV32IB-NEXT: or a0, a0, a1 +; RV32IB-NEXT: mv a1, a7 ; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: rol_i64: @@ -488,48 +477,37 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: ror_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: andi a3, a2, 63 -; RV32IB-NEXT: addi t1, a3, -32 +; RV32IB-NEXT: srl a7, a0, a2 +; RV32IB-NEXT: andi a4, a2, 63 ; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: bltz t1, .LBB9_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: srl a7, a1, t1 -; RV32IB-NEXT: j .LBB9_3 -; RV32IB-NEXT: .LBB9_2: -; RV32IB-NEXT: srl a4, a0, a2 -; RV32IB-NEXT: sub a3, a6, a3 -; RV32IB-NEXT: slli a5, a1, 1 -; RV32IB-NEXT: sll a3, a5, a3 -; RV32IB-NEXT: or a7, a4, a3 -; RV32IB-NEXT: .LBB9_3: -; RV32IB-NEXT: neg a4, a2 -; RV32IB-NEXT: andi a5, a4, 63 -; RV32IB-NEXT: addi a3, a5, -32 -; RV32IB-NEXT: bltz a3, .LBB9_7 -; RV32IB-NEXT: # %bb.4: -; RV32IB-NEXT: mv t0, zero -; RV32IB-NEXT: bgez a3, .LBB9_8 -; RV32IB-NEXT: .LBB9_5: -; RV32IB-NEXT: sll a3, a1, a4 -; RV32IB-NEXT: sub a4, a6, a5 -; RV32IB-NEXT: srli a0, a0, 1 -; RV32IB-NEXT: srl a0, a0, a4 -; RV32IB-NEXT: or a4, a3, a0 -; RV32IB-NEXT: or a0, a7, t0 -; RV32IB-NEXT: bgez t1, .LBB9_9 -; RV32IB-NEXT: .LBB9_6: +; RV32IB-NEXT: sub a5, a6, a4 +; RV32IB-NEXT: slli a3, a1, 1 +; RV32IB-NEXT: sll a3, a3, a5 +; RV32IB-NEXT: or a7, a7, a3 +; RV32IB-NEXT: addi t1, a4, -32 +; RV32IB-NEXT: srl a5, a1, t1 +; RV32IB-NEXT: slti a3, t1, 0 +; RV32IB-NEXT: cmov a7, a3, a5, a7 +; RV32IB-NEXT: neg a5, a2 +; RV32IB-NEXT: sll t0, a0, a5 +; RV32IB-NEXT: andi t2, a5, 63 +; RV32IB-NEXT: addi a4, t2, -32 +; RV32IB-NEXT: srai a3, a4, 31 +; RV32IB-NEXT: and a3, a3, t0 +; RV32IB-NEXT: or a7, a7, a3 +; RV32IB-NEXT: sll t0, a1, a5 +; RV32IB-NEXT: sub a5, a6, t2 +; RV32IB-NEXT: srli a3, a0, 1 +; RV32IB-NEXT: srl a3, a3, a5 +; RV32IB-NEXT: or a3, t0, a3 +; RV32IB-NEXT: sll a0, a0, a4 +; RV32IB-NEXT: slti a4, a4, 0 +; RV32IB-NEXT: cmov a0, a4, a0, a3 ; RV32IB-NEXT: srl a1, a1, a2 -; RV32IB-NEXT: or a1, a1, a4 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB9_7: -; RV32IB-NEXT: sll t0, a0, a4 -; RV32IB-NEXT: bltz a3, .LBB9_5 -; RV32IB-NEXT: .LBB9_8: -; RV32IB-NEXT: sll a4, a0, a3 -; RV32IB-NEXT: or a0, a7, t0 -; RV32IB-NEXT: bltz t1, .LBB9_6 -; RV32IB-NEXT: .LBB9_9: -; RV32IB-NEXT: or a1, zero, a4 +; RV32IB-NEXT: srai a2, t1, 31 +; RV32IB-NEXT: and a1, a2, a1 +; RV32IB-NEXT: or a1, a1, a0 +; RV32IB-NEXT: mv a0, a7 ; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: ror_i64: diff --git a/llvm/test/CodeGen/RISCV/rv32Zbs.ll b/llvm/test/CodeGen/RISCV/rv32Zbs.ll index ebe33744725a0d..440c41bada7654 100644 --- a/llvm/test/CodeGen/RISCV/rv32Zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbs.ll @@ -80,17 +80,14 @@ define i64 @sbclr_i64(i64 %a, i64 %b) nounwind { ; RV32IB: # %bb.0: ; RV32IB-NEXT: andi a3, a2, 63 ; RV32IB-NEXT: addi a3, a3, -32 -; RV32IB-NEXT: bltz a3, .LBB2_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a2, zero -; RV32IB-NEXT: sbset a3, zero, a3 -; RV32IB-NEXT: j .LBB2_3 -; RV32IB-NEXT: .LBB2_2: -; RV32IB-NEXT: mv a3, zero +; RV32IB-NEXT: slti a4, a3, 0 +; RV32IB-NEXT: sbset a5, zero, a3 +; RV32IB-NEXT: cmov a4, a4, a5, zero ; RV32IB-NEXT: sbset a2, zero, a2 -; RV32IB-NEXT: .LBB2_3: +; RV32IB-NEXT: srai a3, a3, 31 +; RV32IB-NEXT: and a2, a3, a2 +; RV32IB-NEXT: andn a1, a1, a4 ; RV32IB-NEXT: andn a0, a0, a2 -; RV32IB-NEXT: andn a1, a1, a3 ; RV32IB-NEXT: ret ; ; RV32IBS-LABEL: sbclr_i64: @@ -239,15 +236,13 @@ define signext i64 @sbset_i64_zero(i64 signext %a) nounwind { ; ; RV32IB-LABEL: sbset_i64_zero: ; RV32IB: # %bb.0: -; RV32IB-NEXT: addi a1, a0, -32 -; RV32IB-NEXT: bltz a1, .LBB7_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a0, zero -; RV32IB-NEXT: sbset a1, zero, a1 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB7_2: -; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: addi a2, a0, -32 +; RV32IB-NEXT: slti a1, a2, 0 +; RV32IB-NEXT: sbset a3, zero, a2 +; RV32IB-NEXT: cmov a1, a1, a3, zero ; RV32IB-NEXT: sbset a0, zero, a0 +; RV32IB-NEXT: srai a2, a2, 31 +; RV32IB-NEXT: and a0, a2, a0 ; RV32IB-NEXT: ret ; ; RV32IBS-LABEL: sbset_i64_zero: @@ -398,20 +393,17 @@ define i64 @sbext_i64(i64 %a, i64 %b) nounwind { ; ; RV32IB-LABEL: sbext_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: andi a3, a2, 63 -; RV32IB-NEXT: addi a4, a3, -32 -; RV32IB-NEXT: bltz a4, .LBB12_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: srl a0, a1, a4 -; RV32IB-NEXT: j .LBB12_3 -; RV32IB-NEXT: .LBB12_2: ; RV32IB-NEXT: srl a0, a0, a2 -; RV32IB-NEXT: addi a2, zero, 31 -; RV32IB-NEXT: sub a2, a2, a3 -; RV32IB-NEXT: slli a1, a1, 1 -; RV32IB-NEXT: sll a1, a1, a2 -; RV32IB-NEXT: or a0, a0, a1 -; RV32IB-NEXT: .LBB12_3: +; RV32IB-NEXT: andi a2, a2, 63 +; RV32IB-NEXT: addi a3, zero, 31 +; RV32IB-NEXT: sub a3, a3, a2 +; RV32IB-NEXT: slli a4, a1, 1 +; RV32IB-NEXT: sll a3, a4, a3 +; RV32IB-NEXT: or a0, a0, a3 +; RV32IB-NEXT: addi a2, a2, -32 +; RV32IB-NEXT: srl a1, a1, a2 +; RV32IB-NEXT: slti a2, a2, 0 +; RV32IB-NEXT: cmov a0, a2, a1, a0 ; RV32IB-NEXT: andi a0, a0, 1 ; RV32IB-NEXT: mv a1, zero ; RV32IB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32Zbt.ll b/llvm/test/CodeGen/RISCV/rv32Zbt.ll index 7011698c13a9f4..c26b1c31b03272 100644 --- a/llvm/test/CodeGen/RISCV/rv32Zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbt.ll @@ -98,20 +98,14 @@ define i32 @cmov_sle_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ; RV32IB-LABEL: cmov_sle_i32: ; RV32IB: # %bb.0: -; RV32IB-NEXT: bge a2, a1, .LBB3_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a3, a0 -; RV32IB-NEXT: .LBB3_2: -; RV32IB-NEXT: mv a0, a3 +; RV32IB-NEXT: slt a1, a2, a1 +; RV32IB-NEXT: cmov a0, a1, a3, a0 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_sle_i32: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: bge a2, a1, .LBB3_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: mv a3, a0 -; RV32IBT-NEXT: .LBB3_2: -; RV32IBT-NEXT: mv a0, a3 +; RV32IBT-NEXT: slt a1, a2, a1 +; RV32IBT-NEXT: cmov a0, a1, a3, a0 ; RV32IBT-NEXT: ret %tobool = icmp sle i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -130,20 +124,14 @@ define i32 @cmov_sge_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ; RV32IB-LABEL: cmov_sge_i32: ; RV32IB: # %bb.0: -; RV32IB-NEXT: bge a1, a2, .LBB4_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a3, a0 -; RV32IB-NEXT: .LBB4_2: -; RV32IB-NEXT: mv a0, a3 +; RV32IB-NEXT: slt a1, a1, a2 +; RV32IB-NEXT: cmov a0, a1, a3, a0 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_sge_i32: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: bge a1, a2, .LBB4_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: mv a3, a0 -; RV32IBT-NEXT: .LBB4_2: -; RV32IBT-NEXT: mv a0, a3 +; RV32IBT-NEXT: slt a1, a1, a2 +; RV32IBT-NEXT: cmov a0, a1, a3, a0 ; RV32IBT-NEXT: ret %tobool = icmp sge i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -162,20 +150,14 @@ define i32 @cmov_ule_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ; RV32IB-LABEL: cmov_ule_i32: ; RV32IB: # %bb.0: -; RV32IB-NEXT: bgeu a2, a1, .LBB5_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a3, a0 -; RV32IB-NEXT: .LBB5_2: -; RV32IB-NEXT: mv a0, a3 +; RV32IB-NEXT: sltu a1, a2, a1 +; RV32IB-NEXT: cmov a0, a1, a3, a0 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_ule_i32: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: bgeu a2, a1, .LBB5_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: mv a3, a0 -; RV32IBT-NEXT: .LBB5_2: -; RV32IBT-NEXT: mv a0, a3 +; RV32IBT-NEXT: sltu a1, a2, a1 +; RV32IBT-NEXT: cmov a0, a1, a3, a0 ; RV32IBT-NEXT: ret %tobool = icmp ule i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -194,20 +176,14 @@ define i32 @cmov_uge_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ; RV32IB-LABEL: cmov_uge_i32: ; RV32IB: # %bb.0: -; RV32IB-NEXT: bgeu a1, a2, .LBB6_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: mv a3, a0 -; RV32IB-NEXT: .LBB6_2: -; RV32IB-NEXT: mv a0, a3 +; RV32IB-NEXT: sltu a1, a1, a2 +; RV32IB-NEXT: cmov a0, a1, a3, a0 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_uge_i32: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: bgeu a1, a2, .LBB6_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: mv a3, a0 -; RV32IBT-NEXT: .LBB6_2: -; RV32IBT-NEXT: mv a0, a3 +; RV32IBT-NEXT: sltu a1, a1, a2 +; RV32IBT-NEXT: cmov a0, a1, a3, a0 ; RV32IBT-NEXT: ret %tobool = icmp uge i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -268,42 +244,26 @@ define i64 @cmov_sle_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV32IB-LABEL: cmov_sle_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: beq a3, a5, .LBB8_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: slt a2, a5, a3 -; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: beqz a2, .LBB8_3 -; RV32IB-NEXT: j .LBB8_4 -; RV32IB-NEXT: .LBB8_2: +; RV32IB-NEXT: xor t0, a3, a5 +; RV32IB-NEXT: slt a3, a5, a3 +; RV32IB-NEXT: xori a3, a3, 1 ; RV32IB-NEXT: sltu a2, a4, a2 ; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: bnez a2, .LBB8_4 -; RV32IB-NEXT: .LBB8_3: -; RV32IB-NEXT: mv a6, a0 -; RV32IB-NEXT: mv a7, a1 -; RV32IB-NEXT: .LBB8_4: -; RV32IB-NEXT: mv a0, a6 -; RV32IB-NEXT: mv a1, a7 +; RV32IB-NEXT: cmov a2, t0, a2, a3 +; RV32IB-NEXT: cmov a0, a2, a0, a6 +; RV32IB-NEXT: cmov a1, a2, a1, a7 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_sle_i64: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: beq a3, a5, .LBB8_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: slt a2, a5, a3 -; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: beqz a2, .LBB8_3 -; RV32IBT-NEXT: j .LBB8_4 -; RV32IBT-NEXT: .LBB8_2: +; RV32IBT-NEXT: xor t0, a3, a5 +; RV32IBT-NEXT: slt a3, a5, a3 +; RV32IBT-NEXT: xori a3, a3, 1 ; RV32IBT-NEXT: sltu a2, a4, a2 ; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: bnez a2, .LBB8_4 -; RV32IBT-NEXT: .LBB8_3: -; RV32IBT-NEXT: mv a6, a0 -; RV32IBT-NEXT: mv a7, a1 -; RV32IBT-NEXT: .LBB8_4: -; RV32IBT-NEXT: mv a0, a6 -; RV32IBT-NEXT: mv a1, a7 +; RV32IBT-NEXT: cmov a2, t0, a2, a3 +; RV32IBT-NEXT: cmov a0, a2, a0, a6 +; RV32IBT-NEXT: cmov a1, a2, a1, a7 ; RV32IBT-NEXT: ret %tobool = icmp sle i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a @@ -333,42 +293,26 @@ define i64 @cmov_sge_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV32IB-LABEL: cmov_sge_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: beq a3, a5, .LBB9_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: slt a2, a3, a5 -; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: beqz a2, .LBB9_3 -; RV32IB-NEXT: j .LBB9_4 -; RV32IB-NEXT: .LBB9_2: +; RV32IB-NEXT: xor t0, a3, a5 +; RV32IB-NEXT: slt a3, a3, a5 +; RV32IB-NEXT: xori a3, a3, 1 ; RV32IB-NEXT: sltu a2, a2, a4 ; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: bnez a2, .LBB9_4 -; RV32IB-NEXT: .LBB9_3: -; RV32IB-NEXT: mv a6, a0 -; RV32IB-NEXT: mv a7, a1 -; RV32IB-NEXT: .LBB9_4: -; RV32IB-NEXT: mv a0, a6 -; RV32IB-NEXT: mv a1, a7 +; RV32IB-NEXT: cmov a2, t0, a2, a3 +; RV32IB-NEXT: cmov a0, a2, a0, a6 +; RV32IB-NEXT: cmov a1, a2, a1, a7 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_sge_i64: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: beq a3, a5, .LBB9_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: slt a2, a3, a5 -; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: beqz a2, .LBB9_3 -; RV32IBT-NEXT: j .LBB9_4 -; RV32IBT-NEXT: .LBB9_2: +; RV32IBT-NEXT: xor t0, a3, a5 +; RV32IBT-NEXT: slt a3, a3, a5 +; RV32IBT-NEXT: xori a3, a3, 1 ; RV32IBT-NEXT: sltu a2, a2, a4 ; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: bnez a2, .LBB9_4 -; RV32IBT-NEXT: .LBB9_3: -; RV32IBT-NEXT: mv a6, a0 -; RV32IBT-NEXT: mv a7, a1 -; RV32IBT-NEXT: .LBB9_4: -; RV32IBT-NEXT: mv a0, a6 -; RV32IBT-NEXT: mv a1, a7 +; RV32IBT-NEXT: cmov a2, t0, a2, a3 +; RV32IBT-NEXT: cmov a0, a2, a0, a6 +; RV32IBT-NEXT: cmov a1, a2, a1, a7 ; RV32IBT-NEXT: ret %tobool = icmp sge i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a @@ -398,42 +342,26 @@ define i64 @cmov_ule_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV32IB-LABEL: cmov_ule_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: beq a3, a5, .LBB10_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: sltu a2, a5, a3 -; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: beqz a2, .LBB10_3 -; RV32IB-NEXT: j .LBB10_4 -; RV32IB-NEXT: .LBB10_2: +; RV32IB-NEXT: xor t0, a3, a5 +; RV32IB-NEXT: sltu a3, a5, a3 +; RV32IB-NEXT: xori a3, a3, 1 ; RV32IB-NEXT: sltu a2, a4, a2 ; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: bnez a2, .LBB10_4 -; RV32IB-NEXT: .LBB10_3: -; RV32IB-NEXT: mv a6, a0 -; RV32IB-NEXT: mv a7, a1 -; RV32IB-NEXT: .LBB10_4: -; RV32IB-NEXT: mv a0, a6 -; RV32IB-NEXT: mv a1, a7 +; RV32IB-NEXT: cmov a2, t0, a2, a3 +; RV32IB-NEXT: cmov a0, a2, a0, a6 +; RV32IB-NEXT: cmov a1, a2, a1, a7 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_ule_i64: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: beq a3, a5, .LBB10_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: sltu a2, a5, a3 -; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: beqz a2, .LBB10_3 -; RV32IBT-NEXT: j .LBB10_4 -; RV32IBT-NEXT: .LBB10_2: +; RV32IBT-NEXT: xor t0, a3, a5 +; RV32IBT-NEXT: sltu a3, a5, a3 +; RV32IBT-NEXT: xori a3, a3, 1 ; RV32IBT-NEXT: sltu a2, a4, a2 ; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: bnez a2, .LBB10_4 -; RV32IBT-NEXT: .LBB10_3: -; RV32IBT-NEXT: mv a6, a0 -; RV32IBT-NEXT: mv a7, a1 -; RV32IBT-NEXT: .LBB10_4: -; RV32IBT-NEXT: mv a0, a6 -; RV32IBT-NEXT: mv a1, a7 +; RV32IBT-NEXT: cmov a2, t0, a2, a3 +; RV32IBT-NEXT: cmov a0, a2, a0, a6 +; RV32IBT-NEXT: cmov a1, a2, a1, a7 ; RV32IBT-NEXT: ret %tobool = icmp ule i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a @@ -463,42 +391,26 @@ define i64 @cmov_uge_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV32IB-LABEL: cmov_uge_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: beq a3, a5, .LBB11_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: sltu a2, a3, a5 -; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: beqz a2, .LBB11_3 -; RV32IB-NEXT: j .LBB11_4 -; RV32IB-NEXT: .LBB11_2: +; RV32IB-NEXT: xor t0, a3, a5 +; RV32IB-NEXT: sltu a3, a3, a5 +; RV32IB-NEXT: xori a3, a3, 1 ; RV32IB-NEXT: sltu a2, a2, a4 ; RV32IB-NEXT: xori a2, a2, 1 -; RV32IB-NEXT: bnez a2, .LBB11_4 -; RV32IB-NEXT: .LBB11_3: -; RV32IB-NEXT: mv a6, a0 -; RV32IB-NEXT: mv a7, a1 -; RV32IB-NEXT: .LBB11_4: -; RV32IB-NEXT: mv a0, a6 -; RV32IB-NEXT: mv a1, a7 +; RV32IB-NEXT: cmov a2, t0, a2, a3 +; RV32IB-NEXT: cmov a0, a2, a0, a6 +; RV32IB-NEXT: cmov a1, a2, a1, a7 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: cmov_uge_i64: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: beq a3, a5, .LBB11_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: sltu a2, a3, a5 -; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: beqz a2, .LBB11_3 -; RV32IBT-NEXT: j .LBB11_4 -; RV32IBT-NEXT: .LBB11_2: +; RV32IBT-NEXT: xor t0, a3, a5 +; RV32IBT-NEXT: sltu a3, a3, a5 +; RV32IBT-NEXT: xori a3, a3, 1 ; RV32IBT-NEXT: sltu a2, a2, a4 ; RV32IBT-NEXT: xori a2, a2, 1 -; RV32IBT-NEXT: bnez a2, .LBB11_4 -; RV32IBT-NEXT: .LBB11_3: -; RV32IBT-NEXT: mv a6, a0 -; RV32IBT-NEXT: mv a7, a1 -; RV32IBT-NEXT: .LBB11_4: -; RV32IBT-NEXT: mv a0, a6 -; RV32IBT-NEXT: mv a1, a7 +; RV32IBT-NEXT: cmov a2, t0, a2, a3 +; RV32IBT-NEXT: cmov a0, a2, a0, a6 +; RV32IBT-NEXT: cmov a1, a2, a1, a7 ; RV32IBT-NEXT: ret %tobool = icmp uge i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a @@ -593,99 +505,75 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { ; ; RV32IB-LABEL: fshl_i64: ; RV32IB: # %bb.0: +; RV32IB-NEXT: sll a7, a1, a4 ; RV32IB-NEXT: andi a5, a4, 63 -; RV32IB-NEXT: addi t2, a5, -32 ; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: bltz t2, .LBB13_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: sll a7, a0, t2 -; RV32IB-NEXT: j .LBB13_3 -; RV32IB-NEXT: .LBB13_2: -; RV32IB-NEXT: sll a7, a1, a4 -; RV32IB-NEXT: sub a5, a6, a5 +; RV32IB-NEXT: sub t0, a6, a5 ; RV32IB-NEXT: srli a1, a0, 1 -; RV32IB-NEXT: srl a1, a1, a5 +; RV32IB-NEXT: srl a1, a1, t0 ; RV32IB-NEXT: or a7, a7, a1 -; RV32IB-NEXT: .LBB13_3: -; RV32IB-NEXT: not t1, a4 +; RV32IB-NEXT: addi t1, a5, -32 +; RV32IB-NEXT: sll t0, a0, t1 +; RV32IB-NEXT: slti a1, t1, 0 +; RV32IB-NEXT: cmov t0, a1, t0, a7 +; RV32IB-NEXT: not a7, a4 +; RV32IB-NEXT: srli t4, a3, 1 +; RV32IB-NEXT: srl t2, t4, a7 ; RV32IB-NEXT: addi a1, zero, 63 -; RV32IB-NEXT: andn a5, a1, a4 -; RV32IB-NEXT: addi a1, a5, -32 -; RV32IB-NEXT: srli t3, a3, 1 -; RV32IB-NEXT: bltz a1, .LBB13_7 -; RV32IB-NEXT: # %bb.4: -; RV32IB-NEXT: mv t0, zero -; RV32IB-NEXT: bgez a1, .LBB13_8 -; RV32IB-NEXT: .LBB13_5: -; RV32IB-NEXT: fsri a1, a2, a3, 1 -; RV32IB-NEXT: srl a1, a1, t1 -; RV32IB-NEXT: sub a2, a6, a5 -; RV32IB-NEXT: slli a3, t3, 1 -; RV32IB-NEXT: sll a2, a3, a2 -; RV32IB-NEXT: or a2, a1, a2 -; RV32IB-NEXT: or a1, a7, t0 -; RV32IB-NEXT: bgez t2, .LBB13_9 -; RV32IB-NEXT: .LBB13_6: +; RV32IB-NEXT: andn t3, a1, a4 +; RV32IB-NEXT: addi a5, t3, -32 +; RV32IB-NEXT: srai a1, a5, 31 +; RV32IB-NEXT: and a1, a1, t2 +; RV32IB-NEXT: or a1, t0, a1 +; RV32IB-NEXT: fsri a2, a2, a3, 1 +; RV32IB-NEXT: srl a7, a2, a7 +; RV32IB-NEXT: sub a3, a6, t3 +; RV32IB-NEXT: slli a2, t4, 1 +; RV32IB-NEXT: sll a2, a2, a3 +; RV32IB-NEXT: or a2, a7, a2 +; RV32IB-NEXT: srl a3, t4, a5 +; RV32IB-NEXT: slti a5, a5, 0 +; RV32IB-NEXT: cmov a2, a5, a3, a2 ; RV32IB-NEXT: sll a0, a0, a4 +; RV32IB-NEXT: srai a3, t1, 31 +; RV32IB-NEXT: and a0, a3, a0 ; RV32IB-NEXT: or a0, a0, a2 ; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB13_7: -; RV32IB-NEXT: srl t0, t3, t1 -; RV32IB-NEXT: bltz a1, .LBB13_5 -; RV32IB-NEXT: .LBB13_8: -; RV32IB-NEXT: srl a2, t3, a1 -; RV32IB-NEXT: or a1, a7, t0 -; RV32IB-NEXT: bltz t2, .LBB13_6 -; RV32IB-NEXT: .LBB13_9: -; RV32IB-NEXT: or a0, zero, a2 -; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: fshl_i64: ; RV32IBT: # %bb.0: +; RV32IBT-NEXT: sll a7, a1, a4 ; RV32IBT-NEXT: andi a5, a4, 63 -; RV32IBT-NEXT: addi t1, a5, -32 ; RV32IBT-NEXT: addi a6, zero, 31 -; RV32IBT-NEXT: bltz t1, .LBB13_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: sll a7, a0, t1 -; RV32IBT-NEXT: j .LBB13_3 -; RV32IBT-NEXT: .LBB13_2: -; RV32IBT-NEXT: sll a7, a1, a4 -; RV32IBT-NEXT: sub a5, a6, a5 +; RV32IBT-NEXT: sub t0, a6, a5 ; RV32IBT-NEXT: srli a1, a0, 1 -; RV32IBT-NEXT: srl a1, a1, a5 +; RV32IBT-NEXT: srl a1, a1, t0 ; RV32IBT-NEXT: or a7, a7, a1 -; RV32IBT-NEXT: .LBB13_3: -; RV32IBT-NEXT: not a1, a4 -; RV32IBT-NEXT: andi t3, a1, 63 -; RV32IBT-NEXT: addi a5, t3, -32 -; RV32IBT-NEXT: srli t2, a3, 1 -; RV32IBT-NEXT: bltz a5, .LBB13_7 -; RV32IBT-NEXT: # %bb.4: -; RV32IBT-NEXT: mv t0, zero -; RV32IBT-NEXT: bgez a5, .LBB13_8 -; RV32IBT-NEXT: .LBB13_5: +; RV32IBT-NEXT: addi t1, a5, -32 +; RV32IBT-NEXT: sll t0, a0, t1 +; RV32IBT-NEXT: slti a1, t1, 0 +; RV32IBT-NEXT: cmov t0, a1, t0, a7 +; RV32IBT-NEXT: not a5, a4 +; RV32IBT-NEXT: srli a7, a3, 1 +; RV32IBT-NEXT: srl t4, a7, a5 +; RV32IBT-NEXT: andi t2, a5, 63 +; RV32IBT-NEXT: addi t3, t2, -32 +; RV32IBT-NEXT: srai a1, t3, 31 +; RV32IBT-NEXT: and a1, a1, t4 +; RV32IBT-NEXT: or a1, t0, a1 ; RV32IBT-NEXT: fsri a2, a2, a3, 1 -; RV32IBT-NEXT: srl a1, a2, a1 -; RV32IBT-NEXT: sub a2, a6, t3 -; RV32IBT-NEXT: slli a3, t2, 1 -; RV32IBT-NEXT: sll a2, a3, a2 -; RV32IBT-NEXT: or a2, a1, a2 -; RV32IBT-NEXT: or a1, a7, t0 -; RV32IBT-NEXT: bgez t1, .LBB13_9 -; RV32IBT-NEXT: .LBB13_6: +; RV32IBT-NEXT: srl a2, a2, a5 +; RV32IBT-NEXT: sub a3, a6, t2 +; RV32IBT-NEXT: slli a5, a7, 1 +; RV32IBT-NEXT: sll a3, a5, a3 +; RV32IBT-NEXT: or a2, a2, a3 +; RV32IBT-NEXT: srl a3, a7, t3 +; RV32IBT-NEXT: slti a5, t3, 0 +; RV32IBT-NEXT: cmov a2, a5, a3, a2 ; RV32IBT-NEXT: sll a0, a0, a4 +; RV32IBT-NEXT: srai a3, t1, 31 +; RV32IBT-NEXT: and a0, a3, a0 ; RV32IBT-NEXT: or a0, a0, a2 -; RV32IBT-NEXT: ret -; RV32IBT-NEXT: .LBB13_7: -; RV32IBT-NEXT: srl t0, t2, a1 -; RV32IBT-NEXT: bltz a5, .LBB13_5 -; RV32IBT-NEXT: .LBB13_8: -; RV32IBT-NEXT: srl a2, t2, a5 -; RV32IBT-NEXT: or a1, a7, t0 -; RV32IBT-NEXT: bltz t1, .LBB13_6 -; RV32IBT-NEXT: .LBB13_9: -; RV32IBT-NEXT: or a0, zero, a2 ; RV32IBT-NEXT: ret %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) ret i64 %1 @@ -781,101 +669,79 @@ define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { ; ; RV32IB-LABEL: fshr_i64: ; RV32IB: # %bb.0: +; RV32IB-NEXT: srl a7, a2, a4 ; RV32IB-NEXT: andi a5, a4, 63 -; RV32IB-NEXT: addi t2, a5, -32 ; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: bltz t2, .LBB15_2 -; RV32IB-NEXT: # %bb.1: -; RV32IB-NEXT: srl a7, a3, t2 -; RV32IB-NEXT: j .LBB15_3 -; RV32IB-NEXT: .LBB15_2: -; RV32IB-NEXT: srl a7, a2, a4 -; RV32IB-NEXT: sub a5, a6, a5 +; RV32IB-NEXT: sub t0, a6, a5 ; RV32IB-NEXT: slli a2, a3, 1 -; RV32IB-NEXT: sll a2, a2, a5 +; RV32IB-NEXT: sll a2, a2, t0 ; RV32IB-NEXT: or a7, a7, a2 -; RV32IB-NEXT: .LBB15_3: -; RV32IB-NEXT: not t1, a4 -; RV32IB-NEXT: addi a2, zero, 63 -; RV32IB-NEXT: andn a2, a2, a4 -; RV32IB-NEXT: addi a5, a2, -32 -; RV32IB-NEXT: slli t3, a0, 1 -; RV32IB-NEXT: bltz a5, .LBB15_7 -; RV32IB-NEXT: # %bb.4: -; RV32IB-NEXT: mv t0, zero -; RV32IB-NEXT: bgez a5, .LBB15_8 -; RV32IB-NEXT: .LBB15_5: +; RV32IB-NEXT: addi t2, a5, -32 +; RV32IB-NEXT: srl t0, a3, t2 +; RV32IB-NEXT: slti a2, t2, 0 +; RV32IB-NEXT: cmov a7, a2, t0, a7 +; RV32IB-NEXT: not t3, a4 +; RV32IB-NEXT: slli t0, a0, 1 +; RV32IB-NEXT: sll t1, t0, t3 +; RV32IB-NEXT: addi a5, zero, 63 +; RV32IB-NEXT: andn t4, a5, a4 +; RV32IB-NEXT: addi a2, t4, -32 +; RV32IB-NEXT: srai a5, a2, 31 +; RV32IB-NEXT: and a5, a5, t1 +; RV32IB-NEXT: or a7, a5, a7 ; RV32IB-NEXT: fsri a1, a0, a1, 31 -; RV32IB-NEXT: sll a1, a1, t1 -; RV32IB-NEXT: sub a2, a6, a2 +; RV32IB-NEXT: sll a1, a1, t3 +; RV32IB-NEXT: sub a5, a6, t4 ; RV32IB-NEXT: sbclri a0, a0, 31 -; RV32IB-NEXT: srl a0, a0, a2 -; RV32IB-NEXT: or a1, a1, a0 -; RV32IB-NEXT: or a0, t0, a7 -; RV32IB-NEXT: bgez t2, .LBB15_9 -; RV32IB-NEXT: .LBB15_6: -; RV32IB-NEXT: srl a2, a3, a4 -; RV32IB-NEXT: or a1, a1, a2 -; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB15_7: -; RV32IB-NEXT: sll t0, t3, t1 -; RV32IB-NEXT: bltz a5, .LBB15_5 -; RV32IB-NEXT: .LBB15_8: -; RV32IB-NEXT: sll a1, t3, a5 -; RV32IB-NEXT: or a0, t0, a7 -; RV32IB-NEXT: bltz t2, .LBB15_6 -; RV32IB-NEXT: .LBB15_9: -; RV32IB-NEXT: or a1, a1, zero +; RV32IB-NEXT: srl a0, a0, a5 +; RV32IB-NEXT: or a0, a1, a0 +; RV32IB-NEXT: sll a1, t0, a2 +; RV32IB-NEXT: slti a2, a2, 0 +; RV32IB-NEXT: cmov a0, a2, a1, a0 +; RV32IB-NEXT: srl a1, a3, a4 +; RV32IB-NEXT: srai a2, t2, 31 +; RV32IB-NEXT: and a1, a2, a1 +; RV32IB-NEXT: or a1, a0, a1 +; RV32IB-NEXT: mv a0, a7 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: fshr_i64: ; RV32IBT: # %bb.0: +; RV32IBT-NEXT: srl a7, a2, a4 ; RV32IBT-NEXT: andi a5, a4, 63 -; RV32IBT-NEXT: addi t1, a5, -32 ; RV32IBT-NEXT: addi a6, zero, 31 -; RV32IBT-NEXT: bltz t1, .LBB15_2 -; RV32IBT-NEXT: # %bb.1: -; RV32IBT-NEXT: srl a7, a3, t1 -; RV32IBT-NEXT: j .LBB15_3 -; RV32IBT-NEXT: .LBB15_2: -; RV32IBT-NEXT: srl a7, a2, a4 -; RV32IBT-NEXT: sub a5, a6, a5 +; RV32IBT-NEXT: sub t0, a6, a5 ; RV32IBT-NEXT: slli a2, a3, 1 -; RV32IBT-NEXT: sll a2, a2, a5 +; RV32IBT-NEXT: sll a2, a2, t0 ; RV32IBT-NEXT: or a7, a7, a2 -; RV32IBT-NEXT: .LBB15_3: -; RV32IBT-NEXT: not a2, a4 -; RV32IBT-NEXT: andi t2, a2, 63 -; RV32IBT-NEXT: addi a5, t2, -32 -; RV32IBT-NEXT: slli t3, a0, 1 -; RV32IBT-NEXT: bltz a5, .LBB15_7 -; RV32IBT-NEXT: # %bb.4: -; RV32IBT-NEXT: mv t0, zero -; RV32IBT-NEXT: bgez a5, .LBB15_8 -; RV32IBT-NEXT: .LBB15_5: -; RV32IBT-NEXT: lui a5, 524288 -; RV32IBT-NEXT: addi a5, a5, -1 -; RV32IBT-NEXT: and t3, a0, a5 -; RV32IBT-NEXT: sub a5, a6, t2 -; RV32IBT-NEXT: srl a5, t3, a5 +; RV32IBT-NEXT: addi t2, a5, -32 +; RV32IBT-NEXT: srl t0, a3, t2 +; RV32IBT-NEXT: slti a2, t2, 0 +; RV32IBT-NEXT: cmov a7, a2, t0, a7 +; RV32IBT-NEXT: not t4, a4 +; RV32IBT-NEXT: slli t0, a0, 1 +; RV32IBT-NEXT: sll t1, t0, t4 +; RV32IBT-NEXT: andi t3, t4, 63 +; RV32IBT-NEXT: addi a5, t3, -32 +; RV32IBT-NEXT: srai a2, a5, 31 +; RV32IBT-NEXT: and a2, a2, t1 +; RV32IBT-NEXT: or a7, a2, a7 +; RV32IBT-NEXT: lui a2, 524288 +; RV32IBT-NEXT: addi a2, a2, -1 +; RV32IBT-NEXT: and t1, a0, a2 +; RV32IBT-NEXT: sub a2, a6, t3 +; RV32IBT-NEXT: srl a2, t1, a2 ; RV32IBT-NEXT: fsri a0, a0, a1, 31 -; RV32IBT-NEXT: sll a0, a0, a2 -; RV32IBT-NEXT: or a1, a0, a5 -; RV32IBT-NEXT: or a0, t0, a7 -; RV32IBT-NEXT: bgez t1, .LBB15_9 -; RV32IBT-NEXT: .LBB15_6: -; RV32IBT-NEXT: srl a2, a3, a4 -; RV32IBT-NEXT: or a1, a1, a2 -; RV32IBT-NEXT: ret -; RV32IBT-NEXT: .LBB15_7: -; RV32IBT-NEXT: sll t0, t3, a2 -; RV32IBT-NEXT: bltz a5, .LBB15_5 -; RV32IBT-NEXT: .LBB15_8: -; RV32IBT-NEXT: sll a1, t3, a5 -; RV32IBT-NEXT: or a0, t0, a7 -; RV32IBT-NEXT: bltz t1, .LBB15_6 -; RV32IBT-NEXT: .LBB15_9: -; RV32IBT-NEXT: or a1, a1, zero +; RV32IBT-NEXT: sll a0, a0, t4 +; RV32IBT-NEXT: or a0, a0, a2 +; RV32IBT-NEXT: sll a1, t0, a5 +; RV32IBT-NEXT: slti a2, a5, 0 +; RV32IBT-NEXT: cmov a0, a2, a1, a0 +; RV32IBT-NEXT: srl a1, a3, a4 +; RV32IBT-NEXT: srai a2, t2, 31 +; RV32IBT-NEXT: and a1, a2, a1 +; RV32IBT-NEXT: or a1, a0, a1 +; RV32IBT-NEXT: mv a0, a7 ; RV32IBT-NEXT: ret %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll index b3eb9f2cff1f72..5d7ddc103f03dd 100644 --- a/llvm/test/CodeGen/RISCV/rv64Zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll @@ -92,20 +92,14 @@ define signext i32 @cmov_sle_i32(i32 signext %a, i32 signext %b, i32 signext %c, ; ; RV64IB-LABEL: cmov_sle_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bge a2, a1, .LBB3_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB3_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: slt a1, a2, a1 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_sle_i32: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bge a2, a1, .LBB3_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB3_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: slt a1, a2, a1 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp sle i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -124,20 +118,14 @@ define signext i32 @cmov_sge_i32(i32 signext %a, i32 signext %b, i32 signext %c, ; ; RV64IB-LABEL: cmov_sge_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bge a1, a2, .LBB4_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB4_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: slt a1, a1, a2 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_sge_i32: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bge a1, a2, .LBB4_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB4_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: slt a1, a1, a2 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp sge i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -156,20 +144,14 @@ define signext i32 @cmov_ule_i32(i32 signext %a, i32 signext %b, i32 signext %c, ; ; RV64IB-LABEL: cmov_ule_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bgeu a2, a1, .LBB5_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB5_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: sltu a1, a2, a1 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_ule_i32: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bgeu a2, a1, .LBB5_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB5_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: sltu a1, a2, a1 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp ule i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -188,20 +170,14 @@ define signext i32 @cmov_uge_i32(i32 signext %a, i32 signext %b, i32 signext %c, ; ; RV64IB-LABEL: cmov_uge_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bgeu a1, a2, .LBB6_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB6_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: sltu a1, a1, a2 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_uge_i32: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bgeu a1, a2, .LBB6_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB6_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: sltu a1, a1, a2 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp uge i32 %b, %c %cond = select i1 %tobool, i32 %d, i32 %a @@ -244,20 +220,14 @@ define i64 @cmov_sle_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV64IB-LABEL: cmov_sle_i64: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bge a2, a1, .LBB8_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB8_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: slt a1, a2, a1 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_sle_i64: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bge a2, a1, .LBB8_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB8_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: slt a1, a2, a1 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp sle i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a @@ -276,20 +246,14 @@ define i64 @cmov_sge_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV64IB-LABEL: cmov_sge_i64: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bge a1, a2, .LBB9_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB9_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: slt a1, a1, a2 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_sge_i64: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bge a1, a2, .LBB9_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB9_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: slt a1, a1, a2 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp sge i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a @@ -308,20 +272,14 @@ define i64 @cmov_ule_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV64IB-LABEL: cmov_ule_i64: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bgeu a2, a1, .LBB10_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB10_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: sltu a1, a2, a1 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_ule_i64: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bgeu a2, a1, .LBB10_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB10_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: sltu a1, a2, a1 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp ule i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a @@ -340,20 +298,14 @@ define i64 @cmov_uge_i64(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; ; RV64IB-LABEL: cmov_uge_i64: ; RV64IB: # %bb.0: -; RV64IB-NEXT: bgeu a1, a2, .LBB11_2 -; RV64IB-NEXT: # %bb.1: -; RV64IB-NEXT: mv a3, a0 -; RV64IB-NEXT: .LBB11_2: -; RV64IB-NEXT: mv a0, a3 +; RV64IB-NEXT: sltu a1, a1, a2 +; RV64IB-NEXT: cmov a0, a1, a3, a0 ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: cmov_uge_i64: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: bgeu a1, a2, .LBB11_2 -; RV64IBT-NEXT: # %bb.1: -; RV64IBT-NEXT: mv a3, a0 -; RV64IBT-NEXT: .LBB11_2: -; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: sltu a1, a1, a2 +; RV64IBT-NEXT: cmov a0, a1, a3, a0 ; RV64IBT-NEXT: ret %tobool = icmp uge i64 %b, %c %cond = select i1 %tobool, i64 %d, i64 %a From 23390e7a131a67fd70e26692fc83f62860dd1095 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 12 Jan 2021 22:35:19 +0100 Subject: [PATCH 75/86] [InstCombine] Handle logical and/or in assume optimization assume(a && b) can be converted to assume(a); assume(b) even if the condition is logical. Same for assume(!(a || b)). --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 4 ++-- llvm/test/Transforms/InstCombine/assume.ll | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8f2e694f3c5b81..7d63b30d35f8c8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1478,14 +1478,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { FunctionType *AssumeIntrinsicTy = II->getFunctionType(); Value *AssumeIntrinsic = II->getCalledOperand(); Value *A, *B; - if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { + if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) { Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles, II->getName()); Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName()); return eraseInstFromFunction(*II); } // assume(!(a || b)) -> assume(!a); assume(!b); - if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) { + if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) { Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, Builder.CreateNot(A), OpBundles, II->getName()); Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index f46ffbec2ce669..b107af16bdc18b 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S -instcombine-infinite-loop-threshold=2 | FileCheck %s +; RUN: opt < %s -instcombine -S -instcombine-infinite-loop-threshold=2 -instcombine-unsafe-select-transform=0 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -627,7 +627,7 @@ define i32 @unreachable_assume_logical(i32 %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: [[OR:%.*]] = select i1 [[CMP0]], i1 true, i1 [[CMP1]] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 ; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] @@ -704,7 +704,7 @@ define i32 @unreachable_assumes_and_store_logical(i32 %x, i32 %y, i32* %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: [[OR:%.*]] = select i1 [[CMP0]], i1 true, i1 [[CMP1]] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 ; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] From 7fd18508134112edb93852c16923a74bfff99cd2 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 12 Jan 2021 12:07:12 +0100 Subject: [PATCH 76/86] [mlir] Update LLVM dialect type documentation Recent commits reconfigured LLVM dialect types to use built-in types whenever possible. Update the documentation accordingly. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D94485 --- mlir/docs/Dialects/LLVM.md | 174 ++++++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 69 deletions(-) diff --git a/mlir/docs/Dialects/LLVM.md b/mlir/docs/Dialects/LLVM.md index d232ffab148c4a..1b85091b075602 100644 --- a/mlir/docs/Dialects/LLVM.md +++ b/mlir/docs/Dialects/LLVM.md @@ -130,9 +130,9 @@ Examples: %3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : vector<4xf32> ``` -Note that constants use built-in types within the initializer definition: MLIR -attributes are typed and the attributes used for constants require a built-in -type. +Note that constants list the type twice. This is an artifact of the LLVM dialect +not using built-in types, which are used for typed MLIR attributes. The syntax +will be reevaluated after considering composite constants. ### Globals @@ -186,33 +186,47 @@ attribute. ## Types -LLVM dialect defines a set of types that correspond to LLVM IR types. The -dialect type system is _closed_: types from other dialects are not allowed -within LLVM dialect aggregate types. This property allows for more concise -custom syntax and ensures easy translation to LLVM IR. - -Similarly to other MLIR context-owned objects, the creation and manipulation of -LLVM dialect types is thread-safe. +LLVM dialect uses built-in types whenever possible and defines a set of +complementary types, which correspond to the LLVM IR types that cannot be +directly represented with built-in types. Similarly to other MLIR context-owned +objects, the creation and manipulation of LLVM dialect types is thread-safe. MLIR does not support module-scoped named type declarations, e.g. `%s = type {i32, i32}` in LLVM IR. Instead, types must be fully specified at each use, except for recursive types where only the first reference to a named type needs -to be fully specified. MLIR type aliases are supported for top-level types, i.e. -they cannot be used inside the type due to type system closedness. +to be fully specified. MLIR [type aliases](LangRef.md#type-aliases) can be used +to achieve more compact syntax. The general syntax of LLVM dialect types is `!llvm.`, followed by a type kind identifier (e.g., `ptr` for pointer or `struct` for structure) and by an optional list of type parameters in angle brackets. The dialect follows MLIR style for types with nested angle brackets and keyword specifiers rather than -using different bracket styles to differentiate types. Inside angle brackets, -the `!llvm` prefix is omitted for brevity; thanks to closedness of the type -system, all types are assumed to be defined in the LLVM dialect. For example, -`!llvm.ptr>` is a pointer to a packed structure type -containing an 8-bit and a 32-bit integer. +using different bracket styles to differentiate types. Types inside the angle +brackets may omit the `!llvm.` prefix for brevity: the parser first attempts to +find a type (starting with `!` or a built-in type) and falls back to accepting a +keyword. For example, `!llvm.ptr>` and `!llvm.ptr>` are +equivalent, with the latter being the canonical form, and denote a pointer to a +pointer to a 32-bit integer. + +### Built-in Type Compatibility + +LLVM dialect accepts a subset of built-in types that are referred to as _LLVM +dialect-compatible types_. The following types are compatible: -### Simple Types +- Signless integers - `iN` (`IntegerType`). +- Floating point types - `bfloat`, `half`, `float`, `double` (`FloatType`). +- 1D vectors of signless integers or floating point types - `vector` + (`VectorType`). -The following non-parametric types are supported. +Note that only a subset of types that can be represented by a given class is +compatible. For example, signed and unsigned integers are not compatible. LLVM +provides a function, `bool LLVM::isCompatibleType(Type)`, that can be used as a +compatibility check. + +### Additional Simple Types + +The following non-parametric types derived from the LLVM IR are available in the +LLVM dialect: - `!llvm.fp128` (`LLVMFP128Type`) - 128-bit floating-point value as per IEEE-754-2008. @@ -231,19 +245,10 @@ The following non-parametric types are supported. These types represent a single value (or an absence thereof in case of `void`) and correspond to their LLVM IR counterparts. -### Parametric Types - -#### Integer Types +### Additional Parametric Types -Integer types are parametric in MLIR terminology, with their bitwidth being a -type parameter. They are expressed as follows: - -``` - llvm-int-type ::= `!llvm.i` integer-literal -``` - -and represented internally as `LLVMIntegerType`. For example, `i1` is a 1-bit -integer type (bool) and `i32` as a 32-bit integer type. +These types are parameterized by the types they contain, e.g., the pointee or +the element type, which can be either compatible built-in or LLVM dialect types. #### Pointer Types @@ -255,46 +260,26 @@ reconsidered if MLIR implements named address spaces. Their syntax is as follows: ``` - llvm-ptr-type ::= `!llvm.ptr<` llvm-type (`,` integer-literal)? `>` + llvm-ptr-type ::= `!llvm.ptr<` type (`,` integer-literal)? `>` ``` where the optional integer literal corresponds to the memory space. Both cases are represented by `LLVMPointerType` internally. -#### Vector Types - -Vector types represent sequences of elements, typically when multiple data -elements are processed by a single instruction (SIMD). Vectors are thought of as -stored in registers and therefore vector elements can only be addressed through -constant indices. - -Vector types are parameterized by the size, which may be either _fixed_ or a -multiple of some fixed size in case of _scalable_ vectors, and the element type. -Vectors cannot be nested and only 1D vectors are supported. Scalable vectors are -still considered 1D. Their syntax is as follows: - -``` - llvm-vec-type ::= `vector<` (`?` `x`)? integer-literal `x` llvm-type `>` -``` - -Internally, fixed vector types are represented as `LLVMFixedVectorType` and -scalable vector types are represented as `LLVMScalableVectorType`. Both classes -derive`LLVMVectorType`. - #### Array Types -Array types represent sequences of elements in memory. Unlike vectors, array -elements can be addressed with a value unknown at compile time, and can be -nested. Only 1D arrays are allowed though. +Array types represent sequences of elements in memory. Array elements can be +addressed with a value unknown at compile time, and can be nested. Only 1D +arrays are allowed though. Array types are parameterized by the fixed size and the element type. -Syntactically, their representation is close to vectors: +Syntactically, their representation is the following: ``` - llvm-array-type ::= `!llvm.array<` integer-literal `x` llvm-type `>` + llvm-array-type ::= `!llvm.array<` integer-literal `x` type `>` ``` -and are internally represented as `LLVMArrayType`. +and they are internally represented as `LLVMArrayType`. #### Function Types @@ -306,15 +291,14 @@ functions (`LLVMFunctionType`) always have single result, which may be `!llvm.void` if the function does not return anything. The syntax is as follows: ``` - llvm-func-type ::= `!llvm.func<` llvm-type `(` llvm-type-list (`,` `...`)? - `)` `>` + llvm-func-type ::= `!llvm.func<` type `(` type-list (`,` `...`)? `)` `>` ``` For example, ```mlir -!llvm.func // a function with no arguments; -!llvm.func // a function with two arguments and a result; +!llvm.func // a function with no arguments; +!llvm.func // a function with two arguments and a result; !llvm.func // a variadic function with at least one argument. ``` @@ -322,6 +306,59 @@ In the LLVM dialect, functions are not first-class objects and one cannot have a value of function type. Instead, one can take the address of a function and operate on pointers to functions. +### Vector Types + +Vector types represent sequences of elements, typically when multiple data +elements are processed by a single instruction (SIMD). Vectors are thought of as +stored in registers and therefore vector elements can only be addressed through +constant indices. + +Vector types are parameterized by the size, which may be either _fixed_ or a +multiple of some fixed size in case of _scalable_ vectors, and the element type. +Vectors cannot be nested and only 1D vectors are supported. Scalable vectors are +still considered 1D. + +LLVM dialect uses built-in vector types for _fixed_-size vectors of built-in +types, and provides additional types for fixed-sized vectors of LLVM dialect +types (`LLVMFixedVectorType`) and scalable vectors of any types +(`LLVMScalableVectorType`). These two additional types share the following +syntax: + +``` + llvm-vec-type ::= `!llvm.vec<` (`?` `x`)? integer-literal `x` type `>` +``` + +Note that the sets of element types supported by built-in and LLVM dialect +vector types are mutually exclusive, e.g., the built-in vector type does not +accept `!llvm.ptr` and the LLVM dialect fixed-width vector type does not +accept `i32`. + +The following functions are provided to operate on any kind of the vector types +compatible with the LLVM dialect: + +- `bool LLVM::isCompatibleVectorType(Type)` - checks whether a type is a + vector type compatible with the LLVM dialect; +- `Type LLVM::getVectorElementType(Type)` - returns the element type of any + vector type compatible with the LLVM dialect; +- `llvm::ElementCount LLVM::getVectorNumElements(Type)` - returns the number + of elements in any vector type compatible with the LLVM dialect; +- `Type LLVM::getFixedVectorType(Type, unsigned)` - gets a fixed vector type + with the given element type and size; the resulting type is either a + built-in or an LLVM dialect vector type depending on which one supports the + given element type. + +#### Examples of Compatible Vector Types + +```mlir +vector<42 x i32> // Vector of 42 32-bit integers. +!llvm.vec<42 x ptr> // Vector of 42 pointers to 32-bit integers. +!llvm.vec // Scalable vector of 32-bit integers with + // size divisible by 4. +!llvm.array<2 x vector<2 x i32>> // Array of 2 vectors of 2 32-bit integers. +!llvm.array<2 x vec<2 x ptr>> // Array of 2 vectors of 2 pointers to 32-bit + // integers. +``` + ### Structure Types The structure type is used to represent a collection of data members together in @@ -362,10 +399,10 @@ The syntax for identified structure types is as follows. ``` llvm-ident-struct-type ::= `!llvm.struct<` string-literal, `opaque` `>` | `!llvm.struct<` string-literal, `packed`? - `(` llvm-type-or-ref-list `)` `>` -llvm-type-or-ref-list ::= -llvm-type-or-ref ::= - | `!llvm.struct<` string-literal > + `(` type-or-ref-list `)` `>` +type-or-ref-list ::= +type-or-ref ::= + | `!llvm.`? `struct<` string-literal `>` ``` The body of the identified struct is printed in full unless the it is @@ -389,9 +426,8 @@ Literal structures are uniqued according to the list of elements they contain, and can optionally be packed. The syntax for such structs is as follows. ``` -llvm-literal-struct-type ::= `!llvm.struct<` `packed`? `(` llvm-type-list `)` - `>` -llvm-type-list ::= +llvm-literal-struct-type ::= `!llvm.struct<` `packed`? `(` type-list `)` `>` +type-list ::= ``` Literal structs cannot be recursive, but can contain other structs. Therefore, From 2a49b7c64a33566cf5db1a5b4042d6037ccc7cf5 Mon Sep 17 00:00:00 2001 From: modimo Date: Tue, 12 Jan 2021 13:19:30 -0800 Subject: [PATCH 77/86] [Inliner] Change inline remark format and update ReplayInlineAdvisor to use it This change modifies the source location formatting from: LineNumber.Discriminator to: LineNumber:ColumnNumber.Discriminator The motivation here is to enhance location information for inline replay that currently exists for the SampleProfile inliner. This will be leveraged further in inline replay for the CGSCC inliner in the related diff. The ReplayInlineAdvisor is also modified to read the new format and now takes into account the callee for greater accuracy. Testing: ninja check-llvm Reviewed By: mtrofin Differential Revision: https://reviews.llvm.org/D94333 --- .../optimization-remark-with-hotness-new-pm.c | 2 +- .../optimization-remark-with-hotness.c | 2 +- llvm/include/llvm/Analysis/InlineAdvisor.h | 19 ++++++ .../llvm/Analysis/ReplayInlineAdvisor.h | 3 +- llvm/lib/Analysis/InlineAdvisor.cpp | 60 ++++++++----------- llvm/lib/Analysis/ReplayInlineAdvisor.cpp | 41 +++++++++---- llvm/lib/Transforms/IPO/SampleProfile.cpp | 2 +- .../optimization-remarks-passed-yaml.ll | 5 +- .../SampleProfile/Inputs/inline-replay.txt | 4 +- .../Transforms/SampleProfile/inline-replay.ll | 2 +- .../SampleProfile/remarks-hotness.ll | 3 +- llvm/test/Transforms/SampleProfile/remarks.ll | 12 +++- 12 files changed, 98 insertions(+), 57 deletions(-) diff --git a/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c b/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c index 76802bfdcdb893..5e4641d92313a8 100644 --- a/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c +++ b/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c @@ -73,7 +73,7 @@ void bar(int x) { // THRESHOLD-NOT: hotness // NO_PGO: '-fdiagnostics-show-hotness' requires profile-guided optimization information // NO_PGO: '-fdiagnostics-hotness-threshold=' requires profile-guided optimization information - // expected-remark@+1 {{foo inlined into bar with (cost=always): always inline attribute at callsite bar:8 (hotness:}} + // expected-remark@+1 {{foo inlined into bar with (cost=always): always inline attribute at callsite bar:8:10; (hotness:}} sum += foo(x, x - 2); } diff --git a/clang/test/Frontend/optimization-remark-with-hotness.c b/clang/test/Frontend/optimization-remark-with-hotness.c index 96be3524db1629..0961e6da11f41f 100644 --- a/clang/test/Frontend/optimization-remark-with-hotness.c +++ b/clang/test/Frontend/optimization-remark-with-hotness.c @@ -66,7 +66,7 @@ void bar(int x) { // THRESHOLD-NOT: hotness // NO_PGO: '-fdiagnostics-show-hotness' requires profile-guided optimization information // NO_PGO: '-fdiagnostics-hotness-threshold=' requires profile-guided optimization information - // expected-remark@+1 {{foo inlined into bar with (cost=always): always inliner at callsite bar:8 (hotness:}} + // expected-remark@+1 {{foo inlined into bar with (cost=always): always inliner at callsite bar:8:10; (hotness:}} sum += foo(x, x - 2); } diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index f051706dca16c5..2967aa9116996c 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -121,6 +121,25 @@ class InlineAdvice { bool Recorded = false; }; +class DefaultInlineAdvice : public InlineAdvice { +public: + DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB, + Optional OIC, OptimizationRemarkEmitter &ORE, + bool EmitRemarks = true) + : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB), + OIC(OIC), EmitRemarks(EmitRemarks) {} + +private: + void recordUnsuccessfulInliningImpl(const InlineResult &Result) override; + void recordInliningWithCalleeDeletedImpl() override; + void recordInliningImpl() override; + +private: + CallBase *const OriginalCB; + Optional OIC; + bool EmitRemarks; +}; + /// Interface for deciding whether to inline a call site or not. class InlineAdvisor { public: diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h index 19e9079a20f589..daed84603541c8 100644 --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -25,13 +25,14 @@ class OptimizationRemarkEmitter; class ReplayInlineAdvisor : public InlineAdvisor { public: ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context, - StringRef RemarksFile); + StringRef RemarksFile, bool EmitRemarks); std::unique_ptr getAdvice(CallBase &CB) override; bool areReplayRemarksLoaded() const { return HasReplayRemarks; } private: StringSet<> InlineSitesFromRemarks; bool HasReplayRemarks = false; + bool EmitRemarks = false; }; } // namespace llvm #endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index 4be17ae9255bb0..c427230404e619 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -48,41 +48,28 @@ static cl::opt cl::desc("Scale to limit the cost of inline deferral"), cl::init(2), cl::Hidden); -namespace { -class DefaultInlineAdvice : public InlineAdvice { -public: - DefaultInlineAdvice(DefaultInlineAdvisor *Advisor, CallBase &CB, - Optional OIC, OptimizationRemarkEmitter &ORE) - : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB), - OIC(OIC) {} - -private: - void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { - using namespace ore; - llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + - "; " + inlineCostStr(*OIC)); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", Caller) << ": " - << NV("Reason", Result.getFailureReason()); - }); - } +void DefaultInlineAdvice::recordUnsuccessfulInliningImpl( + const InlineResult &Result) { + using namespace ore; + llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + + "; " + inlineCostStr(*OIC)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller) << ": " + << NV("Reason", Result.getFailureReason()); + }); +} - void recordInliningWithCalleeDeletedImpl() override { +void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() { + if (EmitRemarks) emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); - } +} - void recordInliningImpl() override { +void DefaultInlineAdvice::recordInliningImpl() { + if (EmitRemarks) emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); - } - -private: - CallBase *const OriginalCB; - Optional OIC; -}; - -} // namespace +} llvm::Optional static getDefaultInlineAdvice( CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) { @@ -389,10 +376,10 @@ std::string llvm::getCallSiteLocation(DebugLoc DLoc) { StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); if (Name.empty()) Name = DIL->getScope()->getSubprogram()->getName(); - CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset); - if (Discriminator) { + CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset) << ":" + << llvm::utostr(DIL->getColumn()); + if (Discriminator) CallSiteLoc << "." << llvm::utostr(Discriminator); - } First = false; } @@ -415,11 +402,14 @@ void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) { StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); if (Name.empty()) Name = DIL->getScope()->getSubprogram()->getName(); - Remark << Name << ":" << ore::NV("Line", Offset); + Remark << Name << ":" << ore::NV("Line", Offset) << ":" + << ore::NV("Column", DIL->getColumn()); if (Discriminator) Remark << "." << ore::NV("Disc", Discriminator); First = false; } + + Remark << ";"; } void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp index d2dd0301630cd1..d6803fac20b753 100644 --- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -6,8 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file implements ReplayInlineAdvisor that replays inline decision based -// on previous inline remarks from optimization remark log. +// This file implements ReplayInlineAdvisor that replays inline decisions based +// on previous inline remarks from optimization remark log. This is a best +// effort approach useful for testing compiler/source changes while holding +// inlining steady. // //===----------------------------------------------------------------------===// @@ -22,8 +24,9 @@ using namespace llvm; ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context, - StringRef RemarksFile) - : InlineAdvisor(FAM), HasReplayRemarks(false) { + StringRef RemarksFile, + bool EmitRemarks) + : InlineAdvisor(FAM), HasReplayRemarks(false), EmitRemarks(EmitRemarks) { auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile); std::error_code EC = BufferOrErr.getError(); if (EC) { @@ -32,16 +35,24 @@ ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM, } // Example for inline remarks to parse: - // _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1 + // main:3:1.1: _Z3subii inlined into main at callsite sum:1 @ main:3:1.1 // We use the callsite string after `at callsite` to replay inlining. line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true); for (; !LineIt.is_at_eof(); ++LineIt) { StringRef Line = *LineIt; auto Pair = Line.split(" at callsite "); - if (Pair.second.empty()) + + auto Callee = Pair.first.split(" inlined into").first.rsplit(": ").second; + + auto CallSite = Pair.second.split(";").first; + + if (Callee.empty() || CallSite.empty()) continue; - InlineSitesFromRemarks.insert(Pair.second); + + std::string Combined = (Callee + CallSite).str(); + InlineSitesFromRemarks.insert(Combined); } + HasReplayRemarks = true; } @@ -52,9 +63,19 @@ std::unique_ptr ReplayInlineAdvisor::getAdvice(CallBase &CB) { auto &ORE = FAM.getResult(Caller); if (InlineSitesFromRemarks.empty()) - return std::make_unique(this, CB, ORE, false); + return std::make_unique(this, CB, None, ORE, + EmitRemarks); std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc()); - bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0; - return std::make_unique(this, CB, ORE, InlineRecommended); + StringRef Callee = CB.getCalledFunction()->getName(); + std::string Combined = (Callee + CallSiteLoc).str(); + auto Iter = InlineSitesFromRemarks.find(Combined); + + Optional InlineRecommended = None; + if (Iter != InlineSitesFromRemarks.end()) { + InlineRecommended = llvm::InlineCost::getAlways("found in replay"); + } + + return std::make_unique(this, CB, InlineRecommended, ORE, + EmitRemarks); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 17307dc3ae60a4..02009c45c9de76 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1960,7 +1960,7 @@ bool SampleProfileLoader::doInitialization(Module &M, if (FAM && !ProfileInlineReplayFile.empty()) { ExternalInlineAdvisor = std::make_unique( - *FAM, Ctx, ProfileInlineReplayFile); + *FAM, Ctx, ProfileInlineReplayFile, /*EmitRemarks=*/false); if (!ExternalInlineAdvisor->areReplayRemarksLoaded()) ExternalInlineAdvisor.reset(); } diff --git a/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll index b29dd11c02ed3d..12250b463a0975 100644 --- a/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll +++ b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll @@ -22,7 +22,7 @@ ; 4 return foo(); ; 5 } -; CHECK: remark: /tmp/s.c:4:10: foo inlined into bar with (cost={{[0-9\-]+}}, threshold={{[0-9]+}}) at callsite bar:1 (hotness: 30) +; CHECK: remark: /tmp/s.c:4:10: foo inlined into bar with (cost={{[0-9\-]+}}, threshold={{[0-9]+}}) at callsite bar:1:10; (hotness: 30) ; YAML: --- !Passed ; YAML-NEXT: Pass: inline @@ -46,6 +46,9 @@ ; YAML-NEXT: - String: bar ; YAML-NEXT: - String: ':' ; YAML-NEXT: - Line: '1' +; YAML-NEXT: - String: ':' +; YAML-NEXT: - Column: '10' +; YAML-NEXT: - String: ';' ; YAML-NEXT: ... ; ModuleID = '/tmp/s.c' diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt index 6842845d565545..ae920515bf5a53 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt @@ -1,2 +1,2 @@ -remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1 -remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1 +remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3:0.1; +remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1:0 @ main:3:0.1; diff --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll index a3c4cff73eb1b5..976e85d160bd34 100644 --- a/llvm/test/Transforms/SampleProfile/inline-replay.ll +++ b/llvm/test/Transforms/SampleProfile/inline-replay.ll @@ -119,4 +119,4 @@ attributes #0 = { "use-sample-profile" } ; REPLAY: _Z3sumii inlined into main ; REPLAY: _Z3subii inlined into main -; REPLA-NOT: _Z3subii inlined into _Z3sumii +; REPLAY-NOT: _Z3subii inlined into _Z3sumii diff --git a/llvm/test/Transforms/SampleProfile/remarks-hotness.ll b/llvm/test/Transforms/SampleProfile/remarks-hotness.ll index 4ab07cd0483861..c1f1fd70051ca3 100644 --- a/llvm/test/Transforms/SampleProfile/remarks-hotness.ll +++ b/llvm/test/Transforms/SampleProfile/remarks-hotness.ll @@ -36,7 +36,7 @@ ; YAML-MISS-NEXT: Function: _Z7caller2v ; YAML-MISS-NEXT: Hotness: 2 -; CHECK-RPASS: _Z7callee1v inlined into _Z7caller1v with (cost=-30, threshold=4500) at callsite _Z7caller1v:1 (hotness: 401) +; CHECK-RPASS: _Z7callee1v inlined into _Z7caller1v with (cost=-30, threshold=4500) at callsite _Z7caller1v:1:10; (hotness: 401) ; CHECK-RPASS-NOT: _Z7callee2v not inlined into _Z7caller2v because it should never be inlined (cost=never): noinline function attribute (hotness: 2) ; ModuleID = 'remarks-hotness.cpp' @@ -93,4 +93,3 @@ attributes #1 = { noinline nounwind uwtable "use-sample-profile" } !17 = distinct !DISubprogram(name: "caller2", linkageName: "_Z7caller2v", scope: !1, file: !1, line: 13, type: !8, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) !18 = !DILocation(line: 14, column: 10, scope: !17) !19 = !DILocation(line: 14, column: 3, scope: !17) - diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll index 1b6201a8e2f20c..3add1e74abaa98 100644 --- a/llvm/test/Transforms/SampleProfile/remarks.ll +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -21,8 +21,8 @@ ; We are expecting foo() to be inlined in main() (almost all the cycles are ; spent inside foo). -; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0 -; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6 @ main:0 +; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21; +; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21; ; The back edge for the loop is the hottest edge in the loop subgraph. ; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3 @@ -53,6 +53,9 @@ ;YAML-NEXT: - String: main ;YAML-NEXT: - String: ':' ;YAML-NEXT: - Line: '0' +;YAML-NEXT: - String: ':' +;YAML-NEXT: - Column: '21' +;YAML-NEXT: - String: ';' ;YAML-NEXT: ... ;YAML: --- !Passed ;YAML-NEXT: Pass: sample-profile-inline @@ -74,10 +77,15 @@ ;YAML-NEXT: - String: _Z3foov ;YAML-NEXT: - String: ':' ;YAML-NEXT: - Line: '6' +;YAML-NEXT: - String: ':' +;YAML-NEXT: - Column: '19' ;YAML-NEXT: - String: ' @ ' ;YAML-NEXT: - String: main ;YAML-NEXT: - String: ':' ;YAML-NEXT: - Line: '0' +;YAML-NEXT: - String: ':' +;YAML-NEXT: - Column: '21' +;YAML-NEXT: - String: ';' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile ;YAML-NEXT: Name: AppliedSamples From 68ff52ffead2ba25cca442778ab19286000daad7 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 12 Jan 2021 16:48:19 -0500 Subject: [PATCH 78/86] [OpenMP] Fixed the link error that cannot find static data member Constant static data member can be defined in the class without another define after the class in C++17. Although it is C++17, Clang can still handle it even w/o the flag for C++17. Unluckily, GCC cannot handle that. Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D94541 --- .../plugins/common/MemoryManager/MemoryManager.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h b/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h index 1f9cbeb00394c3..6e00728a658f71 100644 --- a/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h +++ b/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h @@ -338,4 +338,9 @@ class MemoryManagerTy { } }; +// GCC still cannot handle the static data member like Clang so we still need +// this part. +constexpr const size_t MemoryManagerTy::BucketSize[]; +constexpr const int MemoryManagerTy::NumBuckets; + #endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_MEMORYMANAGER_MEMORYMANAGER_H From d1fa7afc7aefd822698fe86431d8184b1e8b6683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 11 Dec 2020 12:42:07 +0200 Subject: [PATCH 79/86] [AArch64] [Windows] Properly add :lo12: reloc specifiers when generating assembly This makes sure that assembly output actually can be assembled. Set the correct MCExpr relocations specifier VK_PAGEOFF - and also set VK_PAGE consistently even though it's not visible in the assembly output. Differential Revision: https://reviews.llvm.org/D94365 --- llvm/lib/Target/AArch64/AArch64MCInstLower.cpp | 6 ++++++ llvm/test/CodeGen/AArch64/cfguard-checks.ll | 14 +++++++------- llvm/test/CodeGen/AArch64/dllimport.ll | 12 ++++++------ llvm/test/CodeGen/AArch64/mingw-refptr.ll | 14 +++++++------- .../test/CodeGen/AArch64/stack-protector-target.ll | 2 +- llvm/test/CodeGen/AArch64/win-tls.ll | 6 +++--- llvm/test/CodeGen/AArch64/win_cst_pool.ll | 4 ++-- llvm/test/CodeGen/AArch64/windows-extern-weak.ll | 2 +- 8 files changed, 33 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp index afd5ae6bcbf2cc..10e191ff44cfcb 100644 --- a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -203,6 +203,12 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_SABS; } else { RefFlags |= AArch64MCExpr::VK_ABS; + + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) + RefFlags |= AArch64MCExpr::VK_PAGE; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) + RefFlags |= AArch64MCExpr::VK_PAGEOFF | AArch64MCExpr::VK_NC; } if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3) diff --git a/llvm/test/CodeGen/AArch64/cfguard-checks.ll b/llvm/test/CodeGen/AArch64/cfguard-checks.ll index cb31decd57ba99..66ec4b6ed07426 100644 --- a/llvm/test/CodeGen/AArch64/cfguard-checks.ll +++ b/llvm/test/CodeGen/AArch64/cfguard-checks.ll @@ -18,7 +18,7 @@ entry: ; CHECK-LABEL: func_guard_nocf ; CHECK: adrp x8, target_func - ; CHECK: add x8, x8, target_func + ; CHECK: add x8, x8, :lo12:target_func ; CHECK-NOT: __guard_check_icall_fptr ; CHECK: blr x8 } @@ -37,9 +37,9 @@ entry: ; The call to __guard_check_icall_fptr should come immediately before the call to the target function. ; CHECK-LABEL: func_optnone_cf ; CHECK: adrp x8, target_func - ; CHECK: add x8, x8, target_func + ; CHECK: add x8, x8, :lo12:target_func ; CHECK: adrp x9, __guard_check_icall_fptr - ; CHECK: add x9, x9, __guard_check_icall_fptr + ; CHECK: add x9, x9, :lo12:__guard_check_icall_fptr ; CHECK: ldr x9, [x9] ; CHECK: mov x15, x8 ; CHECK: blr x9 @@ -60,9 +60,9 @@ entry: ; The call to __guard_check_icall_fptr should come immediately before the call to the target function. ; CHECK-LABEL: func_cf ; CHECK: adrp x8, __guard_check_icall_fptr - ; CHECK: ldr x9, [x8, __guard_check_icall_fptr] + ; CHECK: ldr x9, [x8, :lo12:__guard_check_icall_fptr] ; CHECK: adrp x8, target_func - ; CHECK: add x8, x8, target_func + ; CHECK: add x8, x8, :lo12:target_func ; CHECK: mov x15, x8 ; CHECK: blr x9 ; CHECK-NEXT: blr x8 @@ -89,9 +89,9 @@ lpad: ; preds = %entry ; The call to __guard_check_icall_fptr should come immediately before the call to the target function. ; CHECK-LABEL: func_cf_invoke ; CHECK: adrp x8, __guard_check_icall_fptr - ; CHECK: ldr x9, [x8, __guard_check_icall_fptr] + ; CHECK: ldr x9, [x8, :lo12:__guard_check_icall_fptr] ; CHECK: adrp x8, target_func - ; CHECK: add x8, x8, target_func + ; CHECK: add x8, x8, :lo12:target_func ; CHECK: mov x15, x8 ; CHECK: blr x9 ; CHECK-NEXT: .Ltmp0: diff --git a/llvm/test/CodeGen/AArch64/dllimport.ll b/llvm/test/CodeGen/AArch64/dllimport.ll index d72e987aec2d9f..ed90c805c53b20 100644 --- a/llvm/test/CodeGen/AArch64/dllimport.ll +++ b/llvm/test/CodeGen/AArch64/dllimport.ll @@ -14,7 +14,7 @@ define i32 @get_var() { ; CHECK-LABEL: get_var ; CHECK: adrp x8, __imp_var -; CHECK: ldr x8, [x8, __imp_var] +; CHECK: ldr x8, [x8, :lo12:__imp_var] ; CHECK: ldr w0, [x8] ; CHECK: ret @@ -25,10 +25,10 @@ define i32 @get_ext() { ; CHECK-LABEL: get_ext ; CHECK: adrp x8, ext -; DAG-ISEL: ldr w0, [x8, ext] -; FAST-ISEL: add x8, x8, ext +; DAG-ISEL: ldr w0, [x8, :lo12:ext] +; FAST-ISEL: add x8, x8, :lo12:ext ; FAST-ISEL: ldr w0, [x8] -; GLOBAL-ISEL-FALLBACK: ldr w0, [x8, ext] +; GLOBAL-ISEL-FALLBACK: ldr w0, [x8, :lo12:ext] ; CHECK: ret define i32* @get_var_pointer() { @@ -37,7 +37,7 @@ define i32* @get_var_pointer() { ; CHECK-LABEL: get_var_pointer ; CHECK: adrp [[REG1:x[0-9]+]], __imp_var -; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]], __imp_var] +; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]], :lo12:__imp_var] ; CHECK: ret define i32 @call_external() { @@ -47,7 +47,7 @@ define i32 @call_external() { ; CHECK-LABEL: call_external ; CHECK: adrp x0, __imp_external -; CHECK: ldr x0, [x0, __imp_external] +; CHECK: ldr x0, [x0, :lo12:__imp_external] ; CHECK: br x0 define i32 @call_internal() { diff --git a/llvm/test/CodeGen/AArch64/mingw-refptr.ll b/llvm/test/CodeGen/AArch64/mingw-refptr.ll index dde1b895257811..e68658eadeec16 100644 --- a/llvm/test/CodeGen/AArch64/mingw-refptr.ll +++ b/llvm/test/CodeGen/AArch64/mingw-refptr.ll @@ -11,7 +11,7 @@ define dso_local i32 @getVar() { ; CHECK-LABEL: getVar: ; CHECK: adrp x8, .refptr.var -; CHECK: ldr x8, [x8, .refptr.var] +; CHECK: ldr x8, [x8, :lo12:.refptr.var] ; CHECK: ldr w0, [x8] ; CHECK: ret entry: @@ -22,7 +22,7 @@ entry: define dso_local i32 @getDsoLocalVar() { ; CHECK-LABEL: getDsoLocalVar: ; CHECK: adrp x8, dsolocalvar -; CHECK: ldr w0, [x8, dsolocalvar] +; CHECK: ldr w0, [x8, :lo12:dsolocalvar] ; CHECK: ret entry: %0 = load i32, i32* @dsolocalvar, align 4 @@ -32,7 +32,7 @@ entry: define dso_local i32 @getLocalVar() { ; CHECK-LABEL: getLocalVar: ; CHECK: adrp x8, localvar -; CHECK: ldr w0, [x8, localvar] +; CHECK: ldr w0, [x8, :lo12:localvar] ; CHECK: ret entry: %0 = load i32, i32* @localvar, align 4 @@ -42,7 +42,7 @@ entry: define dso_local i32 @getLocalCommon() { ; CHECK-LABEL: getLocalCommon: ; CHECK: adrp x8, localcommon -; CHECK: ldr w0, [x8, localcommon] +; CHECK: ldr w0, [x8, :lo12:localcommon] ; CHECK: ret entry: %0 = load i32, i32* @localcommon, align 4 @@ -52,7 +52,7 @@ entry: define dso_local i32 @getExtVar() { ; CHECK-LABEL: getExtVar: ; CHECK: adrp x8, __imp_extvar -; CHECK: ldr x8, [x8, __imp_extvar] +; CHECK: ldr x8, [x8, :lo12:__imp_extvar] ; CHECK: ldr w0, [x8] ; CHECK: ret entry: @@ -74,11 +74,11 @@ declare dso_local void @otherFunc() define dso_local void @sspFunc() #0 { ; CHECK-LABEL: sspFunc: ; CHECK: adrp x8, .refptr.__stack_chk_guard -; CHECK: ldr x8, [x8, .refptr.__stack_chk_guard] +; CHECK: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] ; CHECK: ldr x8, [x8] ; GISEL-LABEL: sspFunc: ; GISEL: adrp x8, .refptr.__stack_chk_guard -; GISEL: ldr x8, [x8, .refptr.__stack_chk_guard] +; GISEL: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] ; GISEL: ldr x8, [x8] entry: %c = alloca i8, align 1 diff --git a/llvm/test/CodeGen/AArch64/stack-protector-target.ll b/llvm/test/CodeGen/AArch64/stack-protector-target.ll index 1d4b51bcd1cdfb..0c5905da81fca7 100644 --- a/llvm/test/CodeGen/AArch64/stack-protector-target.ll +++ b/llvm/test/CodeGen/AArch64/stack-protector-target.ll @@ -30,7 +30,7 @@ declare void @_Z7CapturePi(i32*) ; FUCHSIA-AARCH64-COMMON: cmp [[C]], [[D]] ; WINDOWS-AARCH64: adrp x8, __security_cookie -; WINDOWS-AARCH64: ldr x8, [x8, __security_cookie] +; WINDOWS-AARCH64: ldr x8, [x8, :lo12:__security_cookie] ; WINDOWS-AARCH64: str x8, [sp, #8] ; WINDOWS-AARCH64: bl _Z7CapturePi ; WINDOWS-AARCH64: ldr x0, [sp, #8] diff --git a/llvm/test/CodeGen/AArch64/win-tls.ll b/llvm/test/CodeGen/AArch64/win-tls.ll index ea49b99bbaae13..f83a1db391060d 100644 --- a/llvm/test/CodeGen/AArch64/win-tls.ll +++ b/llvm/test/CodeGen/AArch64/win-tls.ll @@ -30,7 +30,7 @@ define i64 @getVar64() { ; CHECK-LABEL: getVar ; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index -; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], :lo12:_tls_index] ; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] ; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3] @@ -39,7 +39,7 @@ define i64 @getVar64() { ; CHECK-LABEL: getPtr ; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index -; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], :lo12:_tls_index] ; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] ; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3] @@ -48,7 +48,7 @@ define i64 @getVar64() { ; CHECK-LABEL: setVar ; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index -; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], :lo12:_tls_index] ; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] ; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3] diff --git a/llvm/test/CodeGen/AArch64/win_cst_pool.ll b/llvm/test/CodeGen/AArch64/win_cst_pool.ll index 5d9eed408d40f9..771118c8601ddb 100644 --- a/llvm/test/CodeGen/AArch64/win_cst_pool.ll +++ b/llvm/test/CodeGen/AArch64/win_cst_pool.ll @@ -11,7 +11,7 @@ define double @double() { ; CHECK-NEXT: .xword 0x2000000000800001 ; CHECK: double: ; CHECK: adrp x8, __real@2000000000800001 -; CHECK-NEXT: ldr d0, [x8, __real@2000000000800001] +; CHECK-NEXT: ldr d0, [x8, :lo12:__real@2000000000800001] ; CHECK-NEXT: ret ; MINGW: .section .rdata,"dr" @@ -20,5 +20,5 @@ define double @double() { ; MINGW-NEXT: .xword 0x2000000000800001 ; MINGW: double: ; MINGW: adrp x8, [[LABEL]] -; MINGW-NEXT: ldr d0, [x8, [[LABEL]]] +; MINGW-NEXT: ldr d0, [x8, :lo12:[[LABEL]]] ; MINGW-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/windows-extern-weak.ll b/llvm/test/CodeGen/AArch64/windows-extern-weak.ll index 3b4f4eeda5ea15..18df2ddc5db42f 100644 --- a/llvm/test/CodeGen/AArch64/windows-extern-weak.ll +++ b/llvm/test/CodeGen/AArch64/windows-extern-weak.ll @@ -8,7 +8,7 @@ define void @func() { ; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, .refptr.weakfunc -; CHECK-NEXT: ldr x8, [x8, .refptr.weakfunc] +; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.weakfunc] ; CHECK-NEXT: cbz x8, .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: blr x8 From 02f1d28ed6b8f33445dae3beed8b6cc8dada4312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 17 Dec 2020 15:40:06 +0200 Subject: [PATCH 80/86] [libcxx] Avoid overflows in the windows __libcpp_steady_clock_now() As freq.QuadValue can be in the range of 10000000 to 19200000, the multiplication before division makes the calculation overflow and wrap to negative values every 16-30 minutes. Instead count the whole seconds separately before adding the scaled fractional seconds. Add a testcase for steady_clock to check that the values returned for now() compare as bigger than the zero time origin; this corresponds to a testcase in Qt [1] [2] (that failed spuriously due to this). [1] https://bugreports.qt.io/browse/QTBUG-89539 [2] https://code.qt.io/cgit/qt/qtbase.git/tree/tests/auto/corelib/kernel/qdeadlinetimer/tst_qdeadlinetimer.cpp?id=f8de5e54022b8b7471131b7ad55c83b69b2684c0#n569 Differential Revision: https://reviews.llvm.org/D93456 --- libcxx/src/chrono.cpp | 5 ++++- .../utilities/time/time.clock/time.clock.steady/now.pass.cpp | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp index 1419cf2f74a850..5291d4fa8dc6fd 100644 --- a/libcxx/src/chrono.cpp +++ b/libcxx/src/chrono.cpp @@ -153,7 +153,10 @@ static steady_clock::time_point __libcpp_steady_clock_now() { LARGE_INTEGER counter; (void) QueryPerformanceCounter(&counter); - return steady_clock::time_point(steady_clock::duration(counter.QuadPart * nano::den / freq.QuadPart)); + auto seconds = counter.QuadPart / freq.QuadPart; + auto fractions = counter.QuadPart % freq.QuadPart; + auto dur = seconds * nano::den + fractions * nano::den / freq.QuadPart; + return steady_clock::time_point(steady_clock::duration(dur)); } #elif defined(CLOCK_MONOTONIC) diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp index 4b8104dd1a6f19..14dc9a9832dc4b 100644 --- a/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp +++ b/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp @@ -25,6 +25,8 @@ int main(int, char**) C::time_point t1 = C::now(); C::time_point t2 = C::now(); assert(t2 >= t1); + // make sure t2 didn't wrap around + assert(t2 > std::chrono::time_point()); return 0; } From 01f1273fe2f0c246f17162de24a8b6e11bad23a8 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 12 Jan 2021 17:00:49 -0500 Subject: [PATCH 81/86] [OpenMP] Fixed a typo in openmp/CMakeLists.txt --- openmp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 12e8d542f9f6f8..67600bebdafbc6 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -73,7 +73,7 @@ if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS) if (LLVM_MAIN_INCLUDE_DIR) list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LLVM_MAIN_INCLUDE_DIR}) elseif (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) - list(APPENDset LIBOMPTARGET_LLVM_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) + list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include) endif() endif() From 3d397091591fca4aa16153bba22f031218bee47d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 6 Jan 2021 14:04:19 -0500 Subject: [PATCH 82/86] AMDGPU: Remove wrapper only call limitation This seems to only have overridden cold handling, which we probably shouldn't do. As far as I can tell the wrapper library functions are still inlined as appropriate. --- llvm/lib/Target/AMDGPU/AMDGPUInline.cpp | 23 ----------------------- llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll | 4 ++-- 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp index 3b96a6a85879f5..4e689b392802e7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -145,26 +145,6 @@ unsigned AMDGPUInliner::getInlineThreshold(CallBase &CB) const { return (unsigned)Thres; } -// Check if call is just a wrapper around another call. -// In this case we only have call and ret instructions. -static bool isWrapperOnlyCall(CallBase &CB) { - Function *Callee = CB.getCalledFunction(); - if (!Callee || Callee->size() != 1) - return false; - const BasicBlock &BB = Callee->getEntryBlock(); - if (const Instruction *I = BB.getFirstNonPHI()) { - if (!isa(I)) { - return false; - } - if (isa(*std::next(I->getIterator()))) { - LLVM_DEBUG(dbgs() << " Wrapper only call detected: " - << Callee->getName() << '\n'); - return true; - } - } - return false; -} - InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) { Function *Callee = CB.getCalledFunction(); Function *Caller = CB.getCaller(); @@ -186,9 +166,6 @@ InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) { return llvm::InlineCost::getNever(IsViable.getFailureReason()); } - if (isWrapperOnlyCall(CB)) - return llvm::InlineCost::getAlways("wrapper-only call"); - InlineParams LocalParams = Params; LocalParams.DefaultThreshold = (int)getInlineThreshold(CB); bool RemarksEnabled = false; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll index 243522e28dd73c..dd06fc17e8edad 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll @@ -40,7 +40,7 @@ if.end: ret void } -define coldcc float @sin_wrapper(float %x) { +define float @sin_wrapper(float %x) { bb: %call = tail call float @_Z3sinf(float %x) ret float %call @@ -83,7 +83,7 @@ entry: %and = and i32 %tid, %n %arrayidx11 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %and %tmp12 = load float, float addrspace(5)* %arrayidx11, align 4 - %c2 = call coldcc float @sin_wrapper(float %tmp12) + %c2 = call float @sin_wrapper(float %tmp12) store float %c2, float addrspace(5)* %arrayidx7, align 4 %xor = xor i32 %tid, %n %arrayidx16 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %xor From cf45731f0eaead79e1ac501b397e330df41ec152 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 12 Jan 2021 14:19:55 -0800 Subject: [PATCH 83/86] [Driver] Fix assertion failure when -fprofile-generate -fcs-profile-generate are used together If conflicting `-fprofile-generate -fcs-profile-generate` are used together, there is currently an assertion failure. Fix the failure. Also add some driver tests. Reviewed By: xur Differential Revision: https://reviews.llvm.org/D94463 --- clang/lib/Driver/ToolChains/Clang.cpp | 4 +++- clang/test/Driver/fcs-profile-generate.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/fcs-profile-generate.c diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4a20936ddda12c..c03b513150b3ff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -766,9 +766,11 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, D.Diag(diag::err_drv_argument_not_allowed_with) << ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling(); - if (CSPGOGenerateArg && PGOGenerateArg) + if (CSPGOGenerateArg && PGOGenerateArg) { D.Diag(diag::err_drv_argument_not_allowed_with) << CSPGOGenerateArg->getSpelling() << PGOGenerateArg->getSpelling(); + PGOGenerateArg = nullptr; + } if (ProfileGenerateArg) { if (ProfileGenerateArg->getOption().matches( diff --git a/clang/test/Driver/fcs-profile-generate.c b/clang/test/Driver/fcs-profile-generate.c new file mode 100644 index 00000000000000..6be7f758c3e6d4 --- /dev/null +++ b/clang/test/Driver/fcs-profile-generate.c @@ -0,0 +1,15 @@ +// RUN: %clang -### -c -fprofile-use=a.profdata -fcs-profile-generate %s 2>&1 | FileCheck %s +// CHECK: "-fprofile-instrument=csllvm" +// CHECK-NOT: "-fprofile-instrument-path= +// CHECK-SAME: "-fprofile-instrument-use-path=a.profdata" + +// RUN: %clang -### -c -fprofile-use=a.profdata -fcs-profile-generate=dir %s 2>&1 | FileCheck %s --check-prefix=CHECK1 +// CHECK1: "-fprofile-instrument=csllvm"{{.*}} "-fprofile-instrument-path=dir{{/|\\\\}}default_%m.profraw" "-fprofile-instrument-use-path=a.profdata" + +/// Degradation case. This usage does not make much sense. +// RUN: %clang -### -c -fcs-profile-generate %s 2>&1 | FileCheck %s --check-prefix=NOUSE +// NOUSE: "-fprofile-instrument=csllvm" +// NOUSE-NOT: "-fprofile-instrument-path= + +// RUN: %clang -### -c -fprofile-generate -fcs-profile-generate %s 2>&1 | FileCheck %s --check-prefix=CONFLICT +// CONFLICT: error: invalid argument '-fcs-profile-generate' not allowed with '-fprofile-generate' From 55f2eeebc96e7522e49e19074cbfbe4e7f074b5b Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 12 Jan 2021 14:06:30 -0800 Subject: [PATCH 84/86] [NFC] Disallow unused prefixes in MC/AMDGPU 1 out of 2 patches. Differential Revision: https://reviews.llvm.org/D94553 --- llvm/test/MC/AMDGPU/add-sub-no-carry.s | 6 +++--- llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s | 6 +++--- llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s | 6 +++--- llvm/test/MC/AMDGPU/ds-gfx9.s | 4 ++-- llvm/test/MC/AMDGPU/ds.s | 12 ++++++------ llvm/test/MC/AMDGPU/flat-gfx10.s | 4 ++-- llvm/test/MC/AMDGPU/flat-global.s | 10 +++++----- llvm/test/MC/AMDGPU/flat-scratch-instructions.s | 10 +++++----- llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_ds.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_flat.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_smem.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_sop.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_vop1.s | 8 ++++---- llvm/test/MC/AMDGPU/gfx10_asm_vop2.s | 4 ++-- llvm/test/MC/AMDGPU/gfx10_asm_vopc.s | 8 ++++---- llvm/test/MC/AMDGPU/gfx10_asm_vopc_e64.s | 8 ++++---- llvm/test/MC/AMDGPU/gfx10_asm_vopc_sdwa.s | 8 ++++---- llvm/test/MC/AMDGPU/gfx10_asm_vopcx.s | 4 ++-- llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s | 6 +++--- llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s | 6 +++--- llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s | 6 +++--- llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s | 6 +++--- .../MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s | 6 +++--- llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s | 6 +++--- .../test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s | 6 +++--- llvm/test/MC/AMDGPU/hsa-wave-size.s | 6 +++--- llvm/test/MC/AMDGPU/regression/bug28165.s | 8 ++++---- llvm/test/MC/AMDGPU/regression/bug28168.s | 4 ++-- llvm/test/MC/AMDGPU/regression/bug28413.s | 8 ++++---- llvm/test/MC/AMDGPU/regression/bug28538.s | 6 +++--- 33 files changed, 100 insertions(+), 100 deletions(-) diff --git a/llvm/test/MC/AMDGPU/add-sub-no-carry.s b/llvm/test/MC/AMDGPU/add-sub-no-carry.s index 2e3ac9d24376ab..1768b73b60af84 100644 --- a/llvm/test/MC/AMDGPU/add-sub-no-carry.s +++ b/llvm/test/MC/AMDGPU/add-sub-no-carry.s @@ -1,7 +1,7 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-VI,ERR-SICIVI --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICI,ERR-SICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=ERR-VI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=ERR-SICI --implicit-check-not=error: %s // FIXME: pre-gfx9 errors should be more useful diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s index ab51b14e54543f..a9b5cfdfad6495 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s @@ -1,7 +1,7 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck -check-prefix=PACKED %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=PACKED %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji 2>&1 %s | FileCheck -check-prefix=UNPACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji 2>&1 %s | FileCheck -check-prefix=UNPACKED-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s index 78ca007171a575..045da853746d13 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s +// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=UNPACKED %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 2>&1 %s | FileCheck -check-prefix=PACKED-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=PACKED-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/ds-gfx9.s b/llvm/test/MC/AMDGPU/ds-gfx9.s index 2ed2f953b0ca8f..7389df45fa96ea 100644 --- a/llvm/test/MC/AMDGPU/ds-gfx9.s +++ b/llvm/test/MC/AMDGPU/ds-gfx9.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR --implicit-check-not=error: %s ds_read_u8_d16 v8, v2 // GFX9: ds_read_u8_d16 v8, v2 ; encoding: [0x00,0x00,0xac,0xd8,0x02,0x00,0x00,0x08] diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s index a618e9027f406a..8fc0e404f53749 100644 --- a/llvm/test/MC/AMDGPU/ds.s +++ b/llvm/test/MC/AMDGPU/ds.s @@ -1,11 +1,11 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SI --check-prefix=SICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SI --check-prefix=SICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=CI,SICI // RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Checks for 16-bit Offsets diff --git a/llvm/test/MC/AMDGPU/flat-gfx10.s b/llvm/test/MC/AMDGPU/flat-gfx10.s index 97ab93d5a98105..bb1aed9cbdb46b 100644 --- a/llvm/test/MC/AMDGPU/flat-gfx10.s +++ b/llvm/test/MC/AMDGPU/flat-gfx10.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s flat_load_dword v1, v[3:4] // GFX10: encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x7d,0x01] diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s index 10f152cc2f8595..2afb283054e6d0 100644 --- a/llvm/test/MC/AMDGPU/flat-global.s +++ b/llvm/test/MC/AMDGPU/flat-global.s @@ -1,9 +1,9 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefixes=GFX9,GCN %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s global_load_ubyte v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x20,0xdc,0x03,0x00,0x7d,0x01] diff --git a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s index a967b883079a9d..ad35310df53554 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s +++ b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s @@ -1,9 +1,9 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s scratch_load_ubyte v1, v2, off // GFX10: encoding: [0x00,0x40,0x20,0xdc,0x02,0x00,0x7d,0x01] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s index ccc363736432fb..b6e4fbc593e71e 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX10: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s index 8858ffd0936230..22ffc13200b975 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] // GFX10: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_ds.s b/llvm/test/MC/AMDGPU/gfx10_asm_ds.s index fd56d5c527ebcc..14670eb6a8ad60 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_ds.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_ds.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s //===----------------------------------------------------------------------===// // ENC_DS. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s index 7f3d9a04b9aa33..10becc0b8442e3 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s //===----------------------------------------------------------------------===// // ENC_FLAT. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s b/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s index 8b6d85c55f28c3..73951bd6c298e7 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s //===----------------------------------------------------------------------===// // ENC_MUBUF. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s index edf70e7f4c47d2..23408afcd86ae3 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s //===----------------------------------------------------------------------===// // ENC_SMEM. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_sop.s b/llvm/test/MC/AMDGPU/gfx10_asm_sop.s index 5401aa49e3b4dd..8bd19beb38ee4c 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_sop.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_sop.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s //===----------------------------------------------------------------------===// // ENC_SOP1. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s index 205e7c1471ca48..469eb31637f083 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_VOP1. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s index 40cb3aadc25031..a812f0ddf01a12 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_VOP2. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vopc.s b/llvm/test/MC/AMDGPU/gfx10_asm_vopc.s index ef13457e79e452..f850741e379f9d 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vopc.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vopc.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_VOPC - v_cmp_* opcodes. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vopc_e64.s b/llvm/test/MC/AMDGPU/gfx10_asm_vopc_e64.s index c2940be07c8246..3a036f32bf5e1f 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vopc_e64.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vopc_e64.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_VOPC, VOP3 variant. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vopc_sdwa.s b/llvm/test/MC/AMDGPU/gfx10_asm_vopc_sdwa.s index 75f130968d4476..3cb859da394659 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vopc_sdwa.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vopc_sdwa.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_VOPC, SDWA variant. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx10_asm_vopcx.s index 54baa79d08ff59..9f9a4853fc6452 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vopcx.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s //===----------------------------------------------------------------------===// // ENC_VOPC - v_cmpx_* opcodes. diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s index 6b966ba3b4d7c8..7da24a9d42ad4d 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK %s ; CHECK: .amdgpu_metadata ; CHECK: amdhsa.kernels: diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s index 86f92f96f8d9c7..a5dbf0a0075cc7 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s // The legacy ValueType field should be parsed without error, but not // re-emitted. diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s index dadd9752036dd7..602b4566d7f253 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK %s // CHECK: .amdgpu_metadata // CHECK: amdhsa.kernels: diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s index b490058895debe..6c9ed6b1fcf207 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s // CHECK: .amd_amdgpu_hsa_metadata // CHECK: Version: [ 1, 0 ] diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s index d226e753f709d8..cad60ea0d6d4ab 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK %s // CHECK: .amdgpu_metadata // CHECK: amdhsa.kernels: diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s index a5e86483055472..a99637e7be0d60 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s // CHECK: .amd_amdgpu_hsa_metadata // CHECK: Version: [ 1, 0 ] diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s index a818b890588194..c5d8da79f3e23f 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s // CHECK: .amd_amdgpu_hsa_metadata // CHECK: Version: [ 1, 0 ] diff --git a/llvm/test/MC/AMDGPU/hsa-wave-size.s b/llvm/test/MC/AMDGPU/hsa-wave-size.s index 5927764f68c341..5825c8057b57c8 100644 --- a/llvm/test/MC/AMDGPU/hsa-wave-size.s +++ b/llvm/test/MC/AMDGPU/hsa-wave-size.s @@ -2,9 +2,9 @@ // RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s // RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX7-ERR %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck --check-prefix=GFX7-ERR %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-W32-ERR %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-W64-ERR %s // GCN: test0: // GFX7: enable_wavefront_size32 = 0 diff --git a/llvm/test/MC/AMDGPU/regression/bug28165.s b/llvm/test/MC/AMDGPU/regression/bug28165.s index 2e646fbec9f0bc..375eb338c0a6ae 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28165.s +++ b/llvm/test/MC/AMDGPU/regression/bug28165.s @@ -1,7 +1,7 @@ -// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI -// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI -// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI // SICI: v_cmp_eq_f64_e32 vcc, 0.5, v[254:255] ; encoding: [0xf0,0xfc,0x45,0x7c] // VI: v_cmp_eq_f64_e32 vcc, 0.5, v[254:255] ; encoding: [0xf0,0xfc,0xc5,0x7c] diff --git a/llvm/test/MC/AMDGPU/regression/bug28168.s b/llvm/test/MC/AMDGPU/regression/bug28168.s index 7d1383c4c7ef63..e65414856e722a 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28168.s +++ b/llvm/test/MC/AMDGPU/regression/bug28168.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CI -// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI +// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CI +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI v_mqsad_pk_u16_u8 v[2:3], s[0:1], 1, v[254:255] // CI: [0x02,0x00,0xe6,0xd2,0x00,0x02,0xf9,0x07] diff --git a/llvm/test/MC/AMDGPU/regression/bug28413.s b/llvm/test/MC/AMDGPU/regression/bug28413.s index 7e6d9d3a275658..f301764b4a6814 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28413.s +++ b/llvm/test/MC/AMDGPU/regression/bug28413.s @@ -1,7 +1,7 @@ -// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI -// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI -// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI v_cmp_eq_i32 vcc, 0.5, v0 // SICI: v_cmp_eq_i32_e32 vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x04,0x7d] diff --git a/llvm/test/MC/AMDGPU/regression/bug28538.s b/llvm/test/MC/AMDGPU/regression/bug28538.s index f9cdb157bbb114..64fa1585d11470 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28538.s +++ b/llvm/test/MC/AMDGPU/regression/bug28538.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOCIVI --check-prefix=NOVI --implicit-check-not=error: -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // NOSICI: error: not a valid operand. From a7130d85e4b9e47b18a89eac3d47fd8c19d449c1 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 12 Jan 2021 22:43:48 +0000 Subject: [PATCH 85/86] [ADT][NFC] Use empty base optimisation in BumpPtrAllocatorImpl Most uses of this class just use the default MallocAllocator. As this contains no fields, we can use the empty base optimisation for BumpPtrAllocatorImpl and save 8 bytes of padding for most use cases. This prevents using a class that is marked as `final` as the `AllocatorT` template argument. In one must use an allocator that has been marked as `final`, the simplest way around this is a proxy class. The class should have all the methods that `AllocaterBase` expects and should forward the calls to your own allocator instance. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D94439 --- llvm/include/llvm/Support/Allocator.h | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index 40c967ccc48578..245432debce667 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -66,7 +66,8 @@ template class BumpPtrAllocatorImpl : public AllocatorBase> { + SizeThreshold, GrowthDelay>>, + private AllocatorT { public: static_assert(SizeThreshold <= SlabSize, "The SizeThreshold must be at most the SlabSize to ensure " @@ -80,15 +81,15 @@ class BumpPtrAllocatorImpl template BumpPtrAllocatorImpl(T &&Allocator) - : Allocator(std::forward(Allocator)) {} + : AllocatorT(std::forward(Allocator)) {} // Manually implement a move constructor as we must clear the old allocator's // slabs as a matter of correctness. BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old) - : CurPtr(Old.CurPtr), End(Old.End), Slabs(std::move(Old.Slabs)), + : AllocatorT(static_cast(Old)), CurPtr(Old.CurPtr), + End(Old.End), Slabs(std::move(Old.Slabs)), CustomSizedSlabs(std::move(Old.CustomSizedSlabs)), - BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize), - Allocator(std::move(Old.Allocator)) { + BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) { Old.CurPtr = Old.End = nullptr; Old.BytesAllocated = 0; Old.Slabs.clear(); @@ -110,7 +111,7 @@ class BumpPtrAllocatorImpl RedZoneSize = RHS.RedZoneSize; Slabs = std::move(RHS.Slabs); CustomSizedSlabs = std::move(RHS.CustomSizedSlabs); - Allocator = std::move(RHS.Allocator); + AllocatorT::operator=(static_cast(RHS)); RHS.CurPtr = RHS.End = nullptr; RHS.BytesAllocated = 0; @@ -170,7 +171,8 @@ class BumpPtrAllocatorImpl // If Size is really big, allocate a separate slab for it. size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; if (PaddedSize > SizeThreshold) { - void *NewSlab = Allocator.Allocate(PaddedSize, alignof(std::max_align_t)); + void *NewSlab = + AllocatorT::Allocate(PaddedSize, alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anyting other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, PaddedSize); @@ -315,9 +317,6 @@ class BumpPtrAllocatorImpl /// a sanitizer. size_t RedZoneSize = 1; - /// The allocator instance we use to get slabs of memory. - AllocatorT Allocator; - static size_t computeSlabSize(unsigned SlabIdx) { // Scale the actual allocated slab size based on the number of slabs // allocated. Every GrowthDelay slabs allocated, we double @@ -333,7 +332,7 @@ class BumpPtrAllocatorImpl size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); void *NewSlab = - Allocator.Allocate(AllocatedSlabSize, alignof(std::max_align_t)); + AllocatorT::Allocate(AllocatedSlabSize, alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anything other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, AllocatedSlabSize); @@ -349,7 +348,7 @@ class BumpPtrAllocatorImpl for (; I != E; ++I) { size_t AllocatedSlabSize = computeSlabSize(std::distance(Slabs.begin(), I)); - Allocator.Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); + AllocatorT::Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); } } @@ -358,7 +357,7 @@ class BumpPtrAllocatorImpl for (auto &PtrAndSize : CustomSizedSlabs) { void *Ptr = PtrAndSize.first; size_t Size = PtrAndSize.second; - Allocator.Deallocate(Ptr, Size, alignof(std::max_align_t)); + AllocatorT::Deallocate(Ptr, Size, alignof(std::max_align_t)); } } From 1730b0f66adaea6ed65d441dc2032013dd3c3664 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 12 Jan 2021 14:37:28 -0800 Subject: [PATCH 86/86] [RISCV] Remove '.mask' from vcompress intrinsic name. NFC It has a mask argument, but isn't a masked instruction. It doesn't use the mask policy of or the v0.t syntax. --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 2 +- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 6 +- llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll | 232 +++++++------- llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll | 296 +++++++++--------- 4 files changed, 268 insertions(+), 268 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 8171be8a1ca852..e45be2b7279602 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -740,7 +740,7 @@ let TargetPrefix = "riscv" in { defm vrgather : RISCVBinaryAAX; - def "int_riscv_vcompress_mask" : RISCVBinaryAAAMask; + def "int_riscv_vcompress" : RISCVBinaryAAAMask; defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 3604a25b0d6ada..a715676183e2de 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -947,7 +947,7 @@ multiclass VPseudoUnaryV_M { multiclass VPseudoUnaryV_V_AnyMask { foreach m = MxList.m in { let VLMul = m.value in - def _VM # "_" # m.MX # "_MASK" : VPseudoUnaryAnyMask; + def _VM # "_" # m.MX : VPseudoUnaryAnyMask; } } @@ -1404,12 +1404,12 @@ class VPatUnaryAnyMask : - Pat<(result_type (!cast(intrinsic#"_mask") + Pat<(result_type (!cast(intrinsic) (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (mask_type VR:$rs2), (XLenVT GPR:$vl))), - (!cast(inst#"_"#kind#"_"#vlmul.MX#"_MASK") + (!cast(inst#"_"#kind#"_"#vlmul.MX) (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (mask_type VR:$rs2), diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll index b8d42eeb9e6cd9..24b6d73d64c3ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s -declare @llvm.riscv.vcompress.mask.nxv1i8( +declare @llvm.riscv.vcompress.nxv1i8( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8: +define @intrinsic_vcompress_vm_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1i8( + %a = call @llvm.riscv.vcompress.nxv1i8( %0, %1, %2, @@ -23,20 +23,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2i8( +declare @llvm.riscv.vcompress.nxv2i8( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8: +define @intrinsic_vcompress_vm_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2i8( + %a = call @llvm.riscv.vcompress.nxv2i8( %0, %1, %2, @@ -45,20 +45,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4i8( +declare @llvm.riscv.vcompress.nxv4i8( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8: +define @intrinsic_vcompress_vm_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4i8( + %a = call @llvm.riscv.vcompress.nxv4i8( %0, %1, %2, @@ -67,20 +67,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8i8( +declare @llvm.riscv.vcompress.nxv8i8( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8: +define @intrinsic_vcompress_vm_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8i8( + %a = call @llvm.riscv.vcompress.nxv8i8( %0, %1, %2, @@ -89,20 +89,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16i8( +declare @llvm.riscv.vcompress.nxv16i8( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8: +define @intrinsic_vcompress_vm_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv16i8( + %a = call @llvm.riscv.vcompress.nxv16i8( %0, %1, %2, @@ -111,20 +111,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv32i8( +declare @llvm.riscv.vcompress.nxv32i8( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8: +define @intrinsic_vcompress_vm_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv32i8( + %a = call @llvm.riscv.vcompress.nxv32i8( %0, %1, %2, @@ -133,14 +133,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv64i8( +declare @llvm.riscv.vcompress.nxv64i8( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8: +define @intrinsic_vcompress_vm_nxv64i8_nxv64i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu ; CHECK-NEXT: vle8.v v8, (a0) @@ -148,7 +148,7 @@ define @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8( @llvm.riscv.vcompress.mask.nxv64i8( + %a = call @llvm.riscv.vcompress.nxv64i8( %0, %1, %2, @@ -157,20 +157,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1i16( +declare @llvm.riscv.vcompress.nxv1i16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16: +define @intrinsic_vcompress_vm_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1i16( + %a = call @llvm.riscv.vcompress.nxv1i16( %0, %1, %2, @@ -179,20 +179,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2i16( +declare @llvm.riscv.vcompress.nxv2i16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16: +define @intrinsic_vcompress_vm_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2i16( + %a = call @llvm.riscv.vcompress.nxv2i16( %0, %1, %2, @@ -201,20 +201,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4i16( +declare @llvm.riscv.vcompress.nxv4i16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16: +define @intrinsic_vcompress_vm_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4i16( + %a = call @llvm.riscv.vcompress.nxv4i16( %0, %1, %2, @@ -223,20 +223,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8i16( +declare @llvm.riscv.vcompress.nxv8i16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16: +define @intrinsic_vcompress_vm_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8i16( + %a = call @llvm.riscv.vcompress.nxv8i16( %0, %1, %2, @@ -245,20 +245,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16i16( +declare @llvm.riscv.vcompress.nxv16i16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16: +define @intrinsic_vcompress_vm_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv16i16( + %a = call @llvm.riscv.vcompress.nxv16i16( %0, %1, %2, @@ -267,14 +267,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv32i16( +declare @llvm.riscv.vcompress.nxv32i16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16: +define @intrinsic_vcompress_vm_nxv32i16_nxv32i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v8, (a0) @@ -282,7 +282,7 @@ define @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16( @llvm.riscv.vcompress.mask.nxv32i16( + %a = call @llvm.riscv.vcompress.nxv32i16( %0, %1, %2, @@ -291,20 +291,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1i32( +declare @llvm.riscv.vcompress.nxv1i32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32: +define @intrinsic_vcompress_vm_nxv1i32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1i32( + %a = call @llvm.riscv.vcompress.nxv1i32( %0, %1, %2, @@ -313,20 +313,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2i32( +declare @llvm.riscv.vcompress.nxv2i32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32: +define @intrinsic_vcompress_vm_nxv2i32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2i32( + %a = call @llvm.riscv.vcompress.nxv2i32( %0, %1, %2, @@ -335,20 +335,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4i32( +declare @llvm.riscv.vcompress.nxv4i32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32: +define @intrinsic_vcompress_vm_nxv4i32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4i32( + %a = call @llvm.riscv.vcompress.nxv4i32( %0, %1, %2, @@ -357,20 +357,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8i32( +declare @llvm.riscv.vcompress.nxv8i32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32: +define @intrinsic_vcompress_vm_nxv8i32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8i32( + %a = call @llvm.riscv.vcompress.nxv8i32( %0, %1, %2, @@ -379,14 +379,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16i32( +declare @llvm.riscv.vcompress.nxv16i32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32: +define @intrinsic_vcompress_vm_nxv16i32_nxv16i32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a0) @@ -394,7 +394,7 @@ define @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32( @llvm.riscv.vcompress.mask.nxv16i32( + %a = call @llvm.riscv.vcompress.nxv16i32( %0, %1, %2, @@ -403,20 +403,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1f16( +declare @llvm.riscv.vcompress.nxv1f16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16: +define @intrinsic_vcompress_vm_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1f16( + %a = call @llvm.riscv.vcompress.nxv1f16( %0, %1, %2, @@ -425,20 +425,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2f16( +declare @llvm.riscv.vcompress.nxv2f16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16: +define @intrinsic_vcompress_vm_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2f16( + %a = call @llvm.riscv.vcompress.nxv2f16( %0, %1, %2, @@ -447,20 +447,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4f16( +declare @llvm.riscv.vcompress.nxv4f16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16: +define @intrinsic_vcompress_vm_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4f16( + %a = call @llvm.riscv.vcompress.nxv4f16( %0, %1, %2, @@ -469,20 +469,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8f16( +declare @llvm.riscv.vcompress.nxv8f16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16: +define @intrinsic_vcompress_vm_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8f16( + %a = call @llvm.riscv.vcompress.nxv8f16( %0, %1, %2, @@ -491,20 +491,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16f16( +declare @llvm.riscv.vcompress.nxv16f16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16: +define @intrinsic_vcompress_vm_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv16f16( + %a = call @llvm.riscv.vcompress.nxv16f16( %0, %1, %2, @@ -513,14 +513,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv32f16( +declare @llvm.riscv.vcompress.nxv32f16( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16: +define @intrinsic_vcompress_vm_nxv32f16_nxv32f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v8, (a0) @@ -528,7 +528,7 @@ define @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16( @llvm.riscv.vcompress.mask.nxv32f16( + %a = call @llvm.riscv.vcompress.nxv32f16( %0, %1, %2, @@ -537,20 +537,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1f32( +declare @llvm.riscv.vcompress.nxv1f32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32: +define @intrinsic_vcompress_vm_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1f32( + %a = call @llvm.riscv.vcompress.nxv1f32( %0, %1, %2, @@ -559,20 +559,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2f32( +declare @llvm.riscv.vcompress.nxv2f32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32: +define @intrinsic_vcompress_vm_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2f32( + %a = call @llvm.riscv.vcompress.nxv2f32( %0, %1, %2, @@ -581,20 +581,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4f32( +declare @llvm.riscv.vcompress.nxv4f32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32: +define @intrinsic_vcompress_vm_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4f32( + %a = call @llvm.riscv.vcompress.nxv4f32( %0, %1, %2, @@ -603,20 +603,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8f32( +declare @llvm.riscv.vcompress.nxv8f32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32: +define @intrinsic_vcompress_vm_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8f32( + %a = call @llvm.riscv.vcompress.nxv8f32( %0, %1, %2, @@ -625,14 +625,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16f32( +declare @llvm.riscv.vcompress.nxv16f32( , , , i32); -define @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32: +define @intrinsic_vcompress_vm_nxv16f32_nxv16f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a0) @@ -640,7 +640,7 @@ define @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32( @llvm.riscv.vcompress.mask.nxv16f32( + %a = call @llvm.riscv.vcompress.nxv16f32( %0, %1, %2, diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll index a2e9df6e3fa2bb..dd1b48983344e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s -declare @llvm.riscv.vcompress.mask.nxv1i8( +declare @llvm.riscv.vcompress.nxv1i8( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8: +define @intrinsic_vcompress_vm_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1i8( + %a = call @llvm.riscv.vcompress.nxv1i8( %0, %1, %2, @@ -23,20 +23,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2i8( +declare @llvm.riscv.vcompress.nxv2i8( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8: +define @intrinsic_vcompress_vm_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2i8( + %a = call @llvm.riscv.vcompress.nxv2i8( %0, %1, %2, @@ -45,20 +45,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4i8( +declare @llvm.riscv.vcompress.nxv4i8( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8: +define @intrinsic_vcompress_vm_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4i8( + %a = call @llvm.riscv.vcompress.nxv4i8( %0, %1, %2, @@ -67,20 +67,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8i8( +declare @llvm.riscv.vcompress.nxv8i8( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8: +define @intrinsic_vcompress_vm_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8i8( + %a = call @llvm.riscv.vcompress.nxv8i8( %0, %1, %2, @@ -89,20 +89,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16i8( +declare @llvm.riscv.vcompress.nxv16i8( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8: +define @intrinsic_vcompress_vm_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv16i8( + %a = call @llvm.riscv.vcompress.nxv16i8( %0, %1, %2, @@ -111,20 +111,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv32i8( +declare @llvm.riscv.vcompress.nxv32i8( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8: +define @intrinsic_vcompress_vm_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv32i8( + %a = call @llvm.riscv.vcompress.nxv32i8( %0, %1, %2, @@ -133,14 +133,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv64i8( +declare @llvm.riscv.vcompress.nxv64i8( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8: +define @intrinsic_vcompress_vm_nxv64i8_nxv64i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu ; CHECK-NEXT: vle8.v v8, (a0) @@ -148,7 +148,7 @@ define @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8( @llvm.riscv.vcompress.mask.nxv64i8( + %a = call @llvm.riscv.vcompress.nxv64i8( %0, %1, %2, @@ -157,20 +157,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1i16( +declare @llvm.riscv.vcompress.nxv1i16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16: +define @intrinsic_vcompress_vm_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1i16( + %a = call @llvm.riscv.vcompress.nxv1i16( %0, %1, %2, @@ -179,20 +179,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2i16( +declare @llvm.riscv.vcompress.nxv2i16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16: +define @intrinsic_vcompress_vm_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2i16( + %a = call @llvm.riscv.vcompress.nxv2i16( %0, %1, %2, @@ -201,20 +201,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4i16( +declare @llvm.riscv.vcompress.nxv4i16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16: +define @intrinsic_vcompress_vm_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4i16( + %a = call @llvm.riscv.vcompress.nxv4i16( %0, %1, %2, @@ -223,20 +223,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8i16( +declare @llvm.riscv.vcompress.nxv8i16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16: +define @intrinsic_vcompress_vm_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8i16( + %a = call @llvm.riscv.vcompress.nxv8i16( %0, %1, %2, @@ -245,20 +245,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16i16( +declare @llvm.riscv.vcompress.nxv16i16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16: +define @intrinsic_vcompress_vm_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv16i16( + %a = call @llvm.riscv.vcompress.nxv16i16( %0, %1, %2, @@ -267,14 +267,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv32i16( +declare @llvm.riscv.vcompress.nxv32i16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16: +define @intrinsic_vcompress_vm_nxv32i16_nxv32i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v8, (a0) @@ -282,7 +282,7 @@ define @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16( @llvm.riscv.vcompress.mask.nxv32i16( + %a = call @llvm.riscv.vcompress.nxv32i16( %0, %1, %2, @@ -291,20 +291,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1i32( +declare @llvm.riscv.vcompress.nxv1i32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32: +define @intrinsic_vcompress_vm_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1i32( + %a = call @llvm.riscv.vcompress.nxv1i32( %0, %1, %2, @@ -313,20 +313,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2i32( +declare @llvm.riscv.vcompress.nxv2i32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32: +define @intrinsic_vcompress_vm_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2i32( + %a = call @llvm.riscv.vcompress.nxv2i32( %0, %1, %2, @@ -335,20 +335,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4i32( +declare @llvm.riscv.vcompress.nxv4i32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32: +define @intrinsic_vcompress_vm_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4i32( + %a = call @llvm.riscv.vcompress.nxv4i32( %0, %1, %2, @@ -357,20 +357,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8i32( +declare @llvm.riscv.vcompress.nxv8i32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32: +define @intrinsic_vcompress_vm_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8i32( + %a = call @llvm.riscv.vcompress.nxv8i32( %0, %1, %2, @@ -379,14 +379,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16i32( +declare @llvm.riscv.vcompress.nxv16i32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32: +define @intrinsic_vcompress_vm_nxv16i32_nxv16i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a0) @@ -394,7 +394,7 @@ define @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32( @llvm.riscv.vcompress.mask.nxv16i32( + %a = call @llvm.riscv.vcompress.nxv16i32( %0, %1, %2, @@ -403,20 +403,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1i64( +declare @llvm.riscv.vcompress.nxv1i64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64: +define @intrinsic_vcompress_vm_nxv1i64_nxv1i64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1i64( + %a = call @llvm.riscv.vcompress.nxv1i64( %0, %1, %2, @@ -425,20 +425,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2i64( +declare @llvm.riscv.vcompress.nxv2i64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64: +define @intrinsic_vcompress_vm_nxv2i64_nxv2i64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2i64( + %a = call @llvm.riscv.vcompress.nxv2i64( %0, %1, %2, @@ -447,20 +447,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4i64( +declare @llvm.riscv.vcompress.nxv4i64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64: +define @intrinsic_vcompress_vm_nxv4i64_nxv4i64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4i64( + %a = call @llvm.riscv.vcompress.nxv4i64( %0, %1, %2, @@ -469,14 +469,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8i64( +declare @llvm.riscv.vcompress.nxv8i64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64: +define @intrinsic_vcompress_vm_nxv8i64_nxv8i64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0) @@ -484,7 +484,7 @@ define @intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64( @llvm.riscv.vcompress.mask.nxv8i64( + %a = call @llvm.riscv.vcompress.nxv8i64( %0, %1, %2, @@ -493,20 +493,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1f16( +declare @llvm.riscv.vcompress.nxv1f16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16: +define @intrinsic_vcompress_vm_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1f16( + %a = call @llvm.riscv.vcompress.nxv1f16( %0, %1, %2, @@ -515,20 +515,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2f16( +declare @llvm.riscv.vcompress.nxv2f16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16: +define @intrinsic_vcompress_vm_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2f16( + %a = call @llvm.riscv.vcompress.nxv2f16( %0, %1, %2, @@ -537,20 +537,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4f16( +declare @llvm.riscv.vcompress.nxv4f16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16: +define @intrinsic_vcompress_vm_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4f16( + %a = call @llvm.riscv.vcompress.nxv4f16( %0, %1, %2, @@ -559,20 +559,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8f16( +declare @llvm.riscv.vcompress.nxv8f16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16: +define @intrinsic_vcompress_vm_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8f16( + %a = call @llvm.riscv.vcompress.nxv8f16( %0, %1, %2, @@ -581,20 +581,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16f16( +declare @llvm.riscv.vcompress.nxv16f16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16: +define @intrinsic_vcompress_vm_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv16f16( + %a = call @llvm.riscv.vcompress.nxv16f16( %0, %1, %2, @@ -603,14 +603,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv32f16( +declare @llvm.riscv.vcompress.nxv32f16( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16: +define @intrinsic_vcompress_vm_nxv32f16_nxv32f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v8, (a0) @@ -618,7 +618,7 @@ define @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16( @llvm.riscv.vcompress.mask.nxv32f16( + %a = call @llvm.riscv.vcompress.nxv32f16( %0, %1, %2, @@ -627,20 +627,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1f32( +declare @llvm.riscv.vcompress.nxv1f32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32: +define @intrinsic_vcompress_vm_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1f32( + %a = call @llvm.riscv.vcompress.nxv1f32( %0, %1, %2, @@ -649,20 +649,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2f32( +declare @llvm.riscv.vcompress.nxv2f32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32: +define @intrinsic_vcompress_vm_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2f32( + %a = call @llvm.riscv.vcompress.nxv2f32( %0, %1, %2, @@ -671,20 +671,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4f32( +declare @llvm.riscv.vcompress.nxv4f32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32: +define @intrinsic_vcompress_vm_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4f32( + %a = call @llvm.riscv.vcompress.nxv4f32( %0, %1, %2, @@ -693,20 +693,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8f32( +declare @llvm.riscv.vcompress.nxv8f32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32: +define @intrinsic_vcompress_vm_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv8f32( + %a = call @llvm.riscv.vcompress.nxv8f32( %0, %1, %2, @@ -715,14 +715,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv16f32( +declare @llvm.riscv.vcompress.nxv16f32( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32: +define @intrinsic_vcompress_vm_nxv16f32_nxv16f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a0) @@ -730,7 +730,7 @@ define @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32( @llvm.riscv.vcompress.mask.nxv16f32( + %a = call @llvm.riscv.vcompress.nxv16f32( %0, %1, %2, @@ -739,20 +739,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv1f64( +declare @llvm.riscv.vcompress.nxv1f64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64: +define @intrinsic_vcompress_vm_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu ; CHECK-NEXT: vcompress.vm v16, v17, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv1f64( + %a = call @llvm.riscv.vcompress.nxv1f64( %0, %1, %2, @@ -761,20 +761,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv2f64( +declare @llvm.riscv.vcompress.nxv2f64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64: +define @intrinsic_vcompress_vm_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu ; CHECK-NEXT: vcompress.vm v16, v18, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv2f64( + %a = call @llvm.riscv.vcompress.nxv2f64( %0, %1, %2, @@ -783,20 +783,20 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv4f64( +declare @llvm.riscv.vcompress.nxv4f64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64: +define @intrinsic_vcompress_vm_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu ; CHECK-NEXT: vcompress.vm v16, v20, v0 ; CHECK-NEXT: jalr zero, 0(ra) entry: - %a = call @llvm.riscv.vcompress.mask.nxv4f64( + %a = call @llvm.riscv.vcompress.nxv4f64( %0, %1, %2, @@ -805,14 +805,14 @@ entry: ret %a } -declare @llvm.riscv.vcompress.mask.nxv8f64( +declare @llvm.riscv.vcompress.nxv8f64( , , , i64); -define @intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64: +define @intrinsic_vcompress_vm_nxv8f64_nxv8f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0) @@ -820,7 +820,7 @@ define @intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64( @llvm.riscv.vcompress.mask.nxv8f64( + %a = call @llvm.riscv.vcompress.nxv8f64( %0, %1, %2,