diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 80fdedcf9c62599..4180dcc8a720d5b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27919,6 +27919,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, return S; if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG)) return S; + if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL)) + return ABD; return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 8c9c9f7188d4dc3..d4c6a09405e0c49 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -377,7 +377,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x8, x9, lt +; CHECK-NEXT: csel x0, x9, x8, gt ; CHECK-NEXT: ret %cmp = icmp slt i64 %a, %b %ab = sub i64 %a, %b @@ -389,14 +389,13 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_cmp_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: sbc x8, x1, x3 -; CHECK-NEXT: subs x9, x2, x0 -; CHECK-NEXT: sbc x10, x3, x1 -; CHECK-NEXT: subs x11, x0, x2 -; CHECK-NEXT: sbcs xzr, x1, x3 -; CHECK-NEXT: csel x0, x9, x11, lt -; CHECK-NEXT: csel x1, x10, x8, lt +; CHECK-NEXT: subs x8, x0, x2 +; CHECK-NEXT: sbc x9, x1, x3 +; CHECK-NEXT: subs x10, x2, x0 +; CHECK-NEXT: sbc x11, x3, x1 +; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: csel x0, x8, x10, lt +; CHECK-NEXT: csel x1, x9, x11, lt ; CHECK-NEXT: ret %cmp = icmp slt i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index 85b70ede2807bb4..45bb8749b25ed96 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -298,10 +298,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, sxtb -; CHECK-NEXT: csel w0, w9, w10, gt +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sgt i8 %a, %b %ab = sub i8 %a, %b @@ -314,10 +313,9 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, sxth -; CHECK-NEXT: csel w0, w9, w10, ge +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sge i16 %a, %b %ab = sub i16 %a, %b @@ -331,7 +329,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w1, w0 ; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w8, w9, lt +; CHECK-NEXT: csel w0, w9, w8, gt ; CHECK-NEXT: ret %cmp = icmp slt i32 %a, %b %ab = sub i32 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 1613cbce4b8c8a2..b148a29a72976cc 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -379,7 +379,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x8, x9, lo +; CHECK-NEXT: csel x0, x9, x8, hi ; CHECK-NEXT: ret %cmp = icmp ult i64 %a, %b %ab = sub i64 %a, %b @@ -391,14 +391,14 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_cmp_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: sbc x8, x1, x3 -; CHECK-NEXT: subs x9, x2, x0 -; CHECK-NEXT: sbc x10, x3, x1 -; CHECK-NEXT: subs x11, x0, x2 -; CHECK-NEXT: sbcs xzr, x1, x3 -; CHECK-NEXT: csel x0, x9, x11, lo -; CHECK-NEXT: csel x1, x10, x8, lo +; CHECK-NEXT: subs x8, x0, x2 +; CHECK-NEXT: sbcs x9, x1, x3 +; CHECK-NEXT: cset w10, lo +; CHECK-NEXT: sbfx x10, x10, #0, #1 +; CHECK-NEXT: eor x8, x8, x10 +; CHECK-NEXT: eor x9, x9, x10 +; CHECK-NEXT: subs x0, x8, x10 +; CHECK-NEXT: sbc x1, x9, x10 ; CHECK-NEXT: ret %cmp = icmp ult i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 2baa4f0ca43a7a4..22d41dfb85a629a 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -301,10 +301,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, uxtb -; CHECK-NEXT: csel w0, w9, w10, hi +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ugt i8 %a, %b %ab = sub i8 %a, %b @@ -317,10 +316,9 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, uxth -; CHECK-NEXT: csel w0, w9, w10, hs +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp uge i16 %a, %b %ab = sub i16 %a, %b @@ -334,7 +332,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w1, w0 ; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w8, w9, lo +; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %cmp = icmp ult i32 %a, %b %ab = sub i32 %a, %b diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll index e4309a291936378..c2132cf907fdb29 100644 --- a/llvm/test/CodeGen/AMDGPU/sad.ll +++ b/llvm/test/CodeGen/AMDGPU/sad.ll @@ -258,10 +258,9 @@ define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(ptr addrspace(1) %out ; GCN-NEXT: s_add_u32 s16, s16, s13 ; GCN-NEXT: s_addc_u32 s17, s17, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s3, s0, s1 -; GCN-NEXT: s_sub_i32 s6, s1, s0 -; GCN-NEXT: s_cmp_gt_u32 s0, s1 -; GCN-NEXT: s_cselect_b32 s0, s3, s6 +; GCN-NEXT: s_min_u32 s3, s0, s1 +; GCN-NEXT: s_max_u32 s0, s0, s1 +; GCN-NEXT: s_sub_i32 s0, s0, s3 ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v2, s0 ; GCN-NEXT: s_add_i32 s0, s0, s2 @@ -477,18 +476,14 @@ define amdgpu_kernel void @s_sad_u32_i8_pat2(ptr addrspace(1) %out, i8 zeroext % ; GCN-NEXT: s_load_dword s2, s[6:7], 0x2 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s4, s2, 8 ; GCN-NEXT: s_and_b32 s3, s2, 0xff -; GCN-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GCN-NEXT: s_lshr_b32 s6, s2, 16 -; GCN-NEXT: s_sub_i32 s7, s2, s4 -; GCN-NEXT: s_sub_i32 s2, s4, s2 -; GCN-NEXT: s_cmp_gt_u32 s3, s5 -; GCN-NEXT: s_cselect_b32 s2, s7, s2 -; GCN-NEXT: s_add_i32 s2, s2, s6 +; GCN-NEXT: s_bfe_u32 s4, s2, 0x80008 +; GCN-NEXT: s_lshr_b32 s2, s2, 16 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s2 +; GCN-NEXT: v_sad_u32 v2, s3, v0, v1 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_byte v[0:1], v2 ; GCN-NEXT: s_endpgm %icmp0 = icmp ugt i8 %a, %b diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll index 9bd28b91dd4c95e..058f105e8f73585 100644 --- a/llvm/test/CodeGen/RISCV/abds-neg.ll +++ b/llvm/test/CodeGen/RISCV/abds-neg.ll @@ -1791,64 +1791,61 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: abd_cmp_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: sltu a4, a2, a0 ; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: beq a1, a3, .LBB21_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a5, a1, a3 +; RV32I-NEXT: slt a5, a3, a1 ; RV32I-NEXT: .LBB21_2: ; RV32I-NEXT: bnez a5, .LBB21_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB21_4: -; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: sub a1, a3, a1 +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a0, a1, .LBB21_2 +; RV64I-NEXT: blt a1, a0, .LBB21_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB21_2: -; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: sltu a4, a0, a2 +; RV32ZBB-NEXT: sltu a4, a2, a0 ; RV32ZBB-NEXT: mv a5, a4 ; RV32ZBB-NEXT: beq a1, a3, .LBB21_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt a5, a1, a3 +; RV32ZBB-NEXT: slt a5, a3, a1 ; RV32ZBB-NEXT: .LBB21_2: ; RV32ZBB-NEXT: bnez a5, .LBB21_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sub a1, a1, a3 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sub a0, a0, a2 +; RV32ZBB-NEXT: sub a0, a2, a0 ; RV32ZBB-NEXT: ret ; RV32ZBB-NEXT: .LBB21_4: -; RV32ZBB-NEXT: sltu a4, a2, a0 -; RV32ZBB-NEXT: sub a1, a3, a1 +; RV32ZBB-NEXT: sltu a4, a0, a2 +; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sub a0, a2, a0 +; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: blt a0, a1, .LBB21_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: sub a0, a0, a1 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB21_2: -; RV64ZBB-NEXT: sub a0, a1, a0 +; RV64ZBB-NEXT: min a2, a0, a1 +; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp slt i64 %a, %b %ab = sub i64 %a, %b @@ -1860,176 +1857,194 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a6, 8(a1) +; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a6 -; RV32I-NEXT: mv t4, t1 -; RV32I-NEXT: beq t0, a2, .LBB22_2 +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: sltu a2, a7, a6 +; RV32I-NEXT: mv t4, a2 +; RV32I-NEXT: beq t0, t1, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t0, a2 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a4, a3 +; RV32I-NEXT: sltu t2, a5, a3 +; RV32I-NEXT: sltu t5, a1, a4 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a5, .LBB22_4 +; RV32I-NEXT: beq a4, a1, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a5 +; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t0, a2 -; RV32I-NEXT: xor t6, a7, a6 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: mv t6, t3 -; RV32I-NEXT: beqz t5, .LBB22_6 +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: xor t6, t0, t1 +; RV32I-NEXT: xor s0, a6, a7 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: beqz t6, .LBB22_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv t6, t4 +; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB22_6: -; RV32I-NEXT: sltu t4, a3, a4 -; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a5, .LBB22_8 +; RV32I-NEXT: mv t4, t2 +; RV32I-NEXT: beq a1, a4, .LBB22_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a5, a1 +; RV32I-NEXT: mv t4, t5 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: bnez t6, .LBB22_10 +; RV32I-NEXT: sltu t5, a3, a5 +; RV32I-NEXT: mv t6, t5 +; RV32I-NEXT: beq a4, a1, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu t6, a4, a1 +; RV32I-NEXT: .LBB22_10: +; RV32I-NEXT: bnez t3, .LBB22_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sub t0, t1, t0 ; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sltu a7, a6, t3 -; RV32I-NEXT: sub a1, a1, a5 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t4 ; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a6, a6, t3 +; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub a1, a1, t2 -; RV32I-NEXT: sub a3, a4, a3 -; RV32I-NEXT: j .LBB22_11 -; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sltu t1, a6, a7 -; RV32I-NEXT: sub a2, a2, t0 -; RV32I-NEXT: sub a2, a2, t1 +; RV32I-NEXT: sub a4, a6, t4 +; RV32I-NEXT: j .LBB22_13 +; RV32I-NEXT: .LBB22_12: +; RV32I-NEXT: sltu a2, a6, a7 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a2, t0, a2 ; RV32I-NEXT: sub a6, a6, a7 -; RV32I-NEXT: sltu a7, a6, t5 +; RV32I-NEXT: sltu a7, a6, t6 ; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a6, a6, t5 -; RV32I-NEXT: sub a5, a5, a1 -; RV32I-NEXT: sub a1, a5, t4 -; RV32I-NEXT: sub a3, a3, a4 -; RV32I-NEXT: .LBB22_11: -; RV32I-NEXT: sw a6, 8(a0) +; RV32I-NEXT: sub a3, a3, a5 +; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: sub a1, a4, t5 +; RV32I-NEXT: sub a4, a6, t6 +; RV32I-NEXT: .LBB22_13: +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a3, 0(a0) ; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: ; RV64I: # %bb.0: -; RV64I-NEXT: sltu a4, a0, a2 +; RV64I-NEXT: sltu a4, a2, a0 ; RV64I-NEXT: mv a5, a4 ; RV64I-NEXT: beq a1, a3, .LBB22_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: slt a5, a1, a3 +; RV64I-NEXT: slt a5, a3, a1 ; RV64I-NEXT: .LBB22_2: ; RV64I-NEXT: bnez a5, .LBB22_4 ; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: sub a1, a1, a3 +; RV64I-NEXT: sub a1, a3, a1 ; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB22_4: -; RV64I-NEXT: sltu a4, a2, a0 -; RV64I-NEXT: sub a1, a3, a1 +; RV64I-NEXT: sltu a4, a0, a2 +; RV64I-NEXT: sub a1, a1, a3 ; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a0, a2, a0 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 0(a1) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) -; RV32ZBB-NEXT: lw a2, 12(a2) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a4, 4(a1) +; RV32ZBB-NEXT: lw a6, 8(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a6 -; RV32ZBB-NEXT: mv t4, t1 -; RV32ZBB-NEXT: beq t0, a2, .LBB22_2 +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: sltu a2, a7, a6 +; RV32ZBB-NEXT: mv t4, a2 +; RV32ZBB-NEXT: beq t0, t1, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t0, a2 +; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a4, a3 +; RV32ZBB-NEXT: sltu t2, a5, a3 +; RV32ZBB-NEXT: sltu t5, a1, a4 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_4 +; RV32ZBB-NEXT: beq a4, a1, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a5 +; RV32ZBB-NEXT: mv t3, t5 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t0, a2 -; RV32ZBB-NEXT: xor t6, a7, a6 -; RV32ZBB-NEXT: or t5, t6, t5 -; RV32ZBB-NEXT: mv t6, t3 -; RV32ZBB-NEXT: beqz t5, .LBB22_6 +; RV32ZBB-NEXT: addi sp, sp, -16 +; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZBB-NEXT: xor t6, t0, t1 +; RV32ZBB-NEXT: xor s0, a6, a7 +; RV32ZBB-NEXT: or t6, s0, t6 +; RV32ZBB-NEXT: beqz t6, .LBB22_6 ; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv t6, t4 +; RV32ZBB-NEXT: mv t3, t4 ; RV32ZBB-NEXT: .LBB22_6: -; RV32ZBB-NEXT: sltu t4, a3, a4 -; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_8 +; RV32ZBB-NEXT: mv t4, t2 +; RV32ZBB-NEXT: beq a1, a4, .LBB22_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a5, a1 +; RV32ZBB-NEXT: mv t4, t5 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: bnez t6, .LBB22_10 +; RV32ZBB-NEXT: sltu t5, a3, a5 +; RV32ZBB-NEXT: mv t6, t5 +; RV32ZBB-NEXT: beq a4, a1, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu t6, a4, a1 +; RV32ZBB-NEXT: .LBB22_10: +; RV32ZBB-NEXT: bnez t3, .LBB22_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sub t0, t1, t0 ; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a2, a2, t1 -; RV32ZBB-NEXT: sltu a7, a6, t3 -; RV32ZBB-NEXT: sub a1, a1, a5 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t4 ; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a6, a6, t3 +; RV32ZBB-NEXT: sub a3, a5, a3 +; RV32ZBB-NEXT: sub a1, a1, a4 ; RV32ZBB-NEXT: sub a1, a1, t2 -; RV32ZBB-NEXT: sub a3, a4, a3 -; RV32ZBB-NEXT: j .LBB22_11 -; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sltu t1, a6, a7 -; RV32ZBB-NEXT: sub a2, a2, t0 -; RV32ZBB-NEXT: sub a2, a2, t1 +; RV32ZBB-NEXT: sub a4, a6, t4 +; RV32ZBB-NEXT: j .LBB22_13 +; RV32ZBB-NEXT: .LBB22_12: +; RV32ZBB-NEXT: sltu a2, a6, a7 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a2, t0, a2 ; RV32ZBB-NEXT: sub a6, a6, a7 -; RV32ZBB-NEXT: sltu a7, a6, t5 +; RV32ZBB-NEXT: sltu a7, a6, t6 ; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a6, a6, t5 -; RV32ZBB-NEXT: sub a5, a5, a1 -; RV32ZBB-NEXT: sub a1, a5, t4 -; RV32ZBB-NEXT: sub a3, a3, a4 -; RV32ZBB-NEXT: .LBB22_11: -; RV32ZBB-NEXT: sw a6, 8(a0) +; RV32ZBB-NEXT: sub a3, a3, a5 +; RV32ZBB-NEXT: sub a4, a4, a1 +; RV32ZBB-NEXT: sub a1, a4, t5 +; RV32ZBB-NEXT: sub a4, a6, t6 +; RV32ZBB-NEXT: .LBB22_13: +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a3, 0(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) +; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32ZBB-NEXT: addi sp, sp, 16 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sltu a4, a0, a2 +; RV64ZBB-NEXT: sltu a4, a2, a0 ; RV64ZBB-NEXT: mv a5, a4 ; RV64ZBB-NEXT: beq a1, a3, .LBB22_2 ; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: slt a5, a1, a3 +; RV64ZBB-NEXT: slt a5, a3, a1 ; RV64ZBB-NEXT: .LBB22_2: ; RV64ZBB-NEXT: bnez a5, .LBB22_4 ; RV64ZBB-NEXT: # %bb.3: -; RV64ZBB-NEXT: sub a1, a1, a3 +; RV64ZBB-NEXT: sub a1, a3, a1 ; RV64ZBB-NEXT: sub a1, a1, a4 -; RV64ZBB-NEXT: sub a0, a0, a2 +; RV64ZBB-NEXT: sub a0, a2, a0 ; RV64ZBB-NEXT: ret ; RV64ZBB-NEXT: .LBB22_4: -; RV64ZBB-NEXT: sltu a4, a2, a0 -; RV64ZBB-NEXT: sub a1, a3, a1 +; RV64ZBB-NEXT: sltu a4, a0, a2 +; RV64ZBB-NEXT: sub a1, a1, a3 ; RV64ZBB-NEXT: sub a1, a1, a4 -; RV64ZBB-NEXT: sub a0, a2, a0 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp slt i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index 3cebc1128ae8503..b867a55445c95b2 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -1325,42 +1325,35 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; RV32I-LABEL: abd_cmp_i8: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a0, 24 -; RV32I-NEXT: srai a2, a2, 24 -; RV32I-NEXT: slli a3, a1, 24 -; RV32I-NEXT: srai a3, a3, 24 -; RV32I-NEXT: blt a3, a2, .LBB18_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB18_2: +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a0, 56 -; RV64I-NEXT: srai a2, a2, 56 -; RV64I-NEXT: slli a3, a1, 56 -; RV64I-NEXT: srai a3, a3, 56 -; RV64I-NEXT: blt a3, a2, .LBB18_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a0, a1, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB18_2: +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_cmp_i8: ; ZBB: # %bb.0: -; ZBB-NEXT: sext.b a2, a0 -; ZBB-NEXT: sext.b a3, a1 -; ZBB-NEXT: blt a3, a2, .LBB18_2 -; ZBB-NEXT: # %bb.1: -; ZBB-NEXT: sub a0, a1, a0 -; ZBB-NEXT: ret -; ZBB-NEXT: .LBB18_2: -; ZBB-NEXT: sub a0, a0, a1 +; ZBB-NEXT: sext.b a1, a1 +; ZBB-NEXT: sext.b a0, a0 +; ZBB-NEXT: min a2, a0, a1 +; ZBB-NEXT: max a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 ; ZBB-NEXT: ret %cmp = icmp sgt i8 %a, %b %ab = sub i8 %a, %b @@ -1372,42 +1365,35 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; RV32I-LABEL: abd_cmp_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a1, 16 -; RV32I-NEXT: srai a2, a2, 16 -; RV32I-NEXT: slli a3, a0, 16 -; RV32I-NEXT: srai a3, a3, 16 -; RV32I-NEXT: bge a3, a2, .LBB19_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB19_2: +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a1, 48 -; RV64I-NEXT: srai a2, a2, 48 -; RV64I-NEXT: slli a3, a0, 48 -; RV64I-NEXT: srai a3, a3, 48 -; RV64I-NEXT: bge a3, a2, .LBB19_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a0, a1, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB19_2: +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_cmp_i16: ; ZBB: # %bb.0: -; ZBB-NEXT: sext.h a2, a1 -; ZBB-NEXT: sext.h a3, a0 -; ZBB-NEXT: bge a3, a2, .LBB19_2 -; ZBB-NEXT: # %bb.1: -; ZBB-NEXT: sub a0, a1, a0 -; ZBB-NEXT: ret -; ZBB-NEXT: .LBB19_2: -; ZBB-NEXT: sub a0, a0, a1 +; ZBB-NEXT: sext.h a1, a1 +; ZBB-NEXT: sext.h a0, a0 +; ZBB-NEXT: min a2, a0, a1 +; ZBB-NEXT: max a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 ; ZBB-NEXT: ret %cmp = icmp sge i16 %a, %b %ab = sub i16 %a, %b @@ -1419,46 +1405,38 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: abd_cmp_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: blt a0, a1, .LBB20_2 +; RV32I-NEXT: blt a1, a0, .LBB20_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB20_2: -; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a1 -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: blt a3, a2, .LBB20_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a0, a0, a1 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB20_2: -; RV64I-NEXT: subw a0, a1, a0 +; RV64I-NEXT: sext.w a1, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: blt a0, a1, .LBB20_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sub a0, a0, a1 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB20_2: -; RV32ZBB-NEXT: sub a0, a1, a0 +; RV32ZBB-NEXT: min a2, a0, a1 +; RV32ZBB-NEXT: max a0, a0, a1 +; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sext.w a2, a1 -; RV64ZBB-NEXT: sext.w a3, a0 -; RV64ZBB-NEXT: blt a3, a2, .LBB20_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: subw a0, a0, a1 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB20_2: -; RV64ZBB-NEXT: subw a0, a1, a0 +; RV64ZBB-NEXT: sext.w a1, a1 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: min a2, a0, a1 +; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp slt i32 %a, %b %ab = sub i32 %a, %b diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll index 6b121af7e4e84f6..bcacdf44ab10301 100644 --- a/llvm/test/CodeGen/RISCV/abdu-neg.ll +++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll @@ -1740,63 +1740,62 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: abd_cmp_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a4, a0, a2 -; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: beq a1, a3, .LBB21_2 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a2, a0, a2 +; RV32I-NEXT: beq a3, a1, .LBB21_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sltu a0, a1, a3 +; RV32I-NEXT: j .LBB21_3 ; RV32I-NEXT: .LBB21_2: -; RV32I-NEXT: bnez a5, .LBB21_4 -; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sub a0, a0, a2 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB21_4: -; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: sub a1, a3, a1 +; RV32I-NEXT: sltu a0, a0, a2 +; RV32I-NEXT: .LBB21_3: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: xor a2, a2, a1 +; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: xor a1, a3, a1 +; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: add a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a0, a1, .LBB21_2 +; RV64I-NEXT: bltu a1, a0, .LBB21_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB21_2: -; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i64: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: sltu a4, a0, a2 -; RV32ZBB-NEXT: mv a5, a4 -; RV32ZBB-NEXT: beq a1, a3, .LBB21_2 +; RV32ZBB-NEXT: sub a3, a1, a3 +; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a2, a0, a2 +; RV32ZBB-NEXT: beq a3, a1, .LBB21_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a5, a1, a3 +; RV32ZBB-NEXT: sltu a0, a1, a3 +; RV32ZBB-NEXT: j .LBB21_3 ; RV32ZBB-NEXT: .LBB21_2: -; RV32ZBB-NEXT: bnez a5, .LBB21_4 -; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sub a0, a0, a2 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB21_4: -; RV32ZBB-NEXT: sltu a4, a2, a0 -; RV32ZBB-NEXT: sub a1, a3, a1 +; RV32ZBB-NEXT: sltu a0, a0, a2 +; RV32ZBB-NEXT: .LBB21_3: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: xor a2, a2, a1 +; RV32ZBB-NEXT: sltu a4, a2, a1 +; RV32ZBB-NEXT: xor a1, a3, a1 +; RV32ZBB-NEXT: add a1, a1, a0 ; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sub a0, a2, a0 +; RV32ZBB-NEXT: add a0, a2, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: bltu a0, a1, .LBB21_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: sub a0, a0, a1 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB21_2: -; RV64ZBB-NEXT: sub a0, a1, a0 +; RV64ZBB-NEXT: minu a2, a0, a1 +; RV64ZBB-NEXT: maxu a0, a0, a1 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp ult i64 %a, %b %ab = sub i64 %a, %b @@ -1808,176 +1807,194 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw a2, 12(a2) -; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw t0, 4(a2) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a6 -; RV32I-NEXT: mv t4, t1 -; RV32I-NEXT: beq t0, a2, .LBB22_2 +; RV32I-NEXT: sltu a2, a4, a7 +; RV32I-NEXT: sub t1, a6, t1 +; RV32I-NEXT: sltu t2, a3, a5 +; RV32I-NEXT: sub a2, t1, a2 +; RV32I-NEXT: mv t1, t2 +; RV32I-NEXT: beq a1, t0, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t4, t0, a2 +; RV32I-NEXT: sltu t1, a1, t0 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a4, a3 -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a5, .LBB22_4 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t3, a7, t1 +; RV32I-NEXT: sub a2, a2, t3 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: beq a2, a6, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a5 +; RV32I-NEXT: sltu t1, a6, a2 +; RV32I-NEXT: j .LBB22_5 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t0, a2 -; RV32I-NEXT: xor t6, a7, a6 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: mv t6, t3 -; RV32I-NEXT: beqz t5, .LBB22_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv t6, t4 -; RV32I-NEXT: .LBB22_6: -; RV32I-NEXT: sltu t4, a3, a4 -; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a5, .LBB22_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a5, a1 +; RV32I-NEXT: sltu t1, a4, a7 +; RV32I-NEXT: .LBB22_5: +; RV32I-NEXT: sub t0, a1, t0 +; RV32I-NEXT: sub t0, t0, t2 +; RV32I-NEXT: sub a5, a3, a5 +; RV32I-NEXT: beq t0, a1, .LBB22_7 +; RV32I-NEXT: # %bb.6: +; RV32I-NEXT: sltu a1, a1, t0 +; RV32I-NEXT: j .LBB22_8 +; RV32I-NEXT: .LBB22_7: +; RV32I-NEXT: sltu a1, a3, a5 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: bnez t6, .LBB22_10 +; RV32I-NEXT: xor a3, a2, a6 +; RV32I-NEXT: xor a4, a7, a4 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: beqz a3, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sub a2, t0, a2 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sltu a7, a6, t3 -; RV32I-NEXT: sub a1, a1, a5 -; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a6, a6, t3 -; RV32I-NEXT: sub a1, a1, t2 -; RV32I-NEXT: sub a3, a4, a3 -; RV32I-NEXT: j .LBB22_11 +; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sltu t1, a6, a7 -; RV32I-NEXT: sub a2, a2, t0 -; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sub a6, a6, a7 -; RV32I-NEXT: sltu a7, a6, t5 -; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a6, a6, t5 -; RV32I-NEXT: sub a5, a5, a1 -; RV32I-NEXT: sub a1, a5, t4 -; RV32I-NEXT: sub a3, a3, a4 -; RV32I-NEXT: .LBB22_11: -; RV32I-NEXT: sw a6, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) -; RV32I-NEXT: sw a3, 0(a0) -; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: neg a6, a1 +; RV32I-NEXT: xor a3, a7, a6 +; RV32I-NEXT: sltu a4, a3, a6 +; RV32I-NEXT: xor a2, a2, a6 +; RV32I-NEXT: add a2, a2, a1 +; RV32I-NEXT: sub a4, a2, a4 +; RV32I-NEXT: xor a2, a5, a6 +; RV32I-NEXT: sltu a5, a2, a6 +; RV32I-NEXT: xor a7, t0, a6 +; RV32I-NEXT: mv t1, a5 +; RV32I-NEXT: beqz t0, .LBB22_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu t1, a7, a6 +; RV32I-NEXT: .LBB22_12: +; RV32I-NEXT: add a3, a3, a1 +; RV32I-NEXT: sltu a6, a3, t1 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: add a7, a7, a1 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a4, a0, a2 -; RV64I-NEXT: mv a5, a4 -; RV64I-NEXT: beq a1, a3, .LBB22_2 +; RV64I-NEXT: sub a3, a1, a3 +; RV64I-NEXT: sub a3, a3, a4 +; RV64I-NEXT: sub a2, a0, a2 +; RV64I-NEXT: beq a3, a1, .LBB22_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sltu a5, a1, a3 +; RV64I-NEXT: sltu a0, a1, a3 +; RV64I-NEXT: j .LBB22_3 ; RV64I-NEXT: .LBB22_2: -; RV64I-NEXT: bnez a5, .LBB22_4 -; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: sub a1, a1, a3 -; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a0, a0, a2 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB22_4: -; RV64I-NEXT: sltu a4, a2, a0 -; RV64I-NEXT: sub a1, a3, a1 +; RV64I-NEXT: sltu a0, a0, a2 +; RV64I-NEXT: .LBB22_3: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: xor a2, a2, a1 +; RV64I-NEXT: sltu a4, a2, a1 +; RV64I-NEXT: xor a1, a3, a1 +; RV64I-NEXT: add a1, a1, a0 ; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a0, a2, a0 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 0(a1) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) -; RV32ZBB-NEXT: lw a2, 12(a2) -; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a6, 12(a1) +; RV32ZBB-NEXT: lw t0, 4(a2) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a6 -; RV32ZBB-NEXT: mv t4, t1 -; RV32ZBB-NEXT: beq t0, a2, .LBB22_2 +; RV32ZBB-NEXT: sltu a2, a4, a7 +; RV32ZBB-NEXT: sub t1, a6, t1 +; RV32ZBB-NEXT: sltu t2, a3, a5 +; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: mv t1, t2 +; RV32ZBB-NEXT: beq a1, t0, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t4, t0, a2 +; RV32ZBB-NEXT: sltu t1, a1, t0 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a4, a3 -; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_4 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t3, a7, t1 +; RV32ZBB-NEXT: sub a2, a2, t3 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: beq a2, a6, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a5 +; RV32ZBB-NEXT: sltu t1, a6, a2 +; RV32ZBB-NEXT: j .LBB22_5 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t0, a2 -; RV32ZBB-NEXT: xor t6, a7, a6 -; RV32ZBB-NEXT: or t5, t6, t5 -; RV32ZBB-NEXT: mv t6, t3 -; RV32ZBB-NEXT: beqz t5, .LBB22_6 -; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv t6, t4 -; RV32ZBB-NEXT: .LBB22_6: -; RV32ZBB-NEXT: sltu t4, a3, a4 -; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_8 -; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a5, a1 +; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: .LBB22_5: +; RV32ZBB-NEXT: sub t0, a1, t0 +; RV32ZBB-NEXT: sub t0, t0, t2 +; RV32ZBB-NEXT: sub a5, a3, a5 +; RV32ZBB-NEXT: beq t0, a1, .LBB22_7 +; RV32ZBB-NEXT: # %bb.6: +; RV32ZBB-NEXT: sltu a1, a1, t0 +; RV32ZBB-NEXT: j .LBB22_8 +; RV32ZBB-NEXT: .LBB22_7: +; RV32ZBB-NEXT: sltu a1, a3, a5 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: bnez t6, .LBB22_10 +; RV32ZBB-NEXT: xor a3, a2, a6 +; RV32ZBB-NEXT: xor a4, a7, a4 +; RV32ZBB-NEXT: or a3, a4, a3 +; RV32ZBB-NEXT: beqz a3, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sub a2, t0, a2 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a2, a2, t1 -; RV32ZBB-NEXT: sltu a7, a6, t3 -; RV32ZBB-NEXT: sub a1, a1, a5 -; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a6, a6, t3 -; RV32ZBB-NEXT: sub a1, a1, t2 -; RV32ZBB-NEXT: sub a3, a4, a3 -; RV32ZBB-NEXT: j .LBB22_11 +; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sltu t1, a6, a7 -; RV32ZBB-NEXT: sub a2, a2, t0 -; RV32ZBB-NEXT: sub a2, a2, t1 -; RV32ZBB-NEXT: sub a6, a6, a7 -; RV32ZBB-NEXT: sltu a7, a6, t5 -; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a6, a6, t5 -; RV32ZBB-NEXT: sub a5, a5, a1 -; RV32ZBB-NEXT: sub a1, a5, t4 -; RV32ZBB-NEXT: sub a3, a3, a4 -; RV32ZBB-NEXT: .LBB22_11: -; RV32ZBB-NEXT: sw a6, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) -; RV32ZBB-NEXT: sw a3, 0(a0) -; RV32ZBB-NEXT: sw a2, 12(a0) +; RV32ZBB-NEXT: neg a6, a1 +; RV32ZBB-NEXT: xor a3, a7, a6 +; RV32ZBB-NEXT: sltu a4, a3, a6 +; RV32ZBB-NEXT: xor a2, a2, a6 +; RV32ZBB-NEXT: add a2, a2, a1 +; RV32ZBB-NEXT: sub a4, a2, a4 +; RV32ZBB-NEXT: xor a2, a5, a6 +; RV32ZBB-NEXT: sltu a5, a2, a6 +; RV32ZBB-NEXT: xor a7, t0, a6 +; RV32ZBB-NEXT: mv t1, a5 +; RV32ZBB-NEXT: beqz t0, .LBB22_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: .LBB22_12: +; RV32ZBB-NEXT: add a3, a3, a1 +; RV32ZBB-NEXT: sltu a6, a3, t1 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: add a7, a7, a1 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: sw a1, 0(a0) +; RV32ZBB-NEXT: sw a5, 4(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sltu a4, a0, a2 -; RV64ZBB-NEXT: mv a5, a4 -; RV64ZBB-NEXT: beq a1, a3, .LBB22_2 +; RV64ZBB-NEXT: sub a3, a1, a3 +; RV64ZBB-NEXT: sub a3, a3, a4 +; RV64ZBB-NEXT: sub a2, a0, a2 +; RV64ZBB-NEXT: beq a3, a1, .LBB22_2 ; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: sltu a5, a1, a3 +; RV64ZBB-NEXT: sltu a0, a1, a3 +; RV64ZBB-NEXT: j .LBB22_3 ; RV64ZBB-NEXT: .LBB22_2: -; RV64ZBB-NEXT: bnez a5, .LBB22_4 -; RV64ZBB-NEXT: # %bb.3: -; RV64ZBB-NEXT: sub a1, a1, a3 -; RV64ZBB-NEXT: sub a1, a1, a4 -; RV64ZBB-NEXT: sub a0, a0, a2 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB22_4: -; RV64ZBB-NEXT: sltu a4, a2, a0 -; RV64ZBB-NEXT: sub a1, a3, a1 +; RV64ZBB-NEXT: sltu a0, a0, a2 +; RV64ZBB-NEXT: .LBB22_3: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: xor a2, a2, a1 +; RV64ZBB-NEXT: sltu a4, a2, a1 +; RV64ZBB-NEXT: xor a1, a3, a1 +; RV64ZBB-NEXT: add a1, a1, a0 ; RV64ZBB-NEXT: sub a1, a1, a4 -; RV64ZBB-NEXT: sub a0, a2, a0 +; RV64ZBB-NEXT: add a0, a2, a0 ; RV64ZBB-NEXT: ret %cmp = icmp ult i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/RISCV/abdu.ll b/llvm/test/CodeGen/RISCV/abdu.ll index 0730b9b350863e6..39aef369a29672a 100644 --- a/llvm/test/CodeGen/RISCV/abdu.ll +++ b/llvm/test/CodeGen/RISCV/abdu.ll @@ -1331,17 +1331,34 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { -; CHECK-LABEL: abd_cmp_i8: -; CHECK: # %bb.0: -; CHECK-NEXT: andi a2, a0, 255 -; CHECK-NEXT: andi a3, a1, 255 -; CHECK-NEXT: bltu a3, a2, .LBB18_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sub a0, a1, a0 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB18_2: -; CHECK-NEXT: sub a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: abd_cmp_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a1, a1, 255 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: abd_cmp_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a1, a1, 255 +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret +; +; ZBB-LABEL: abd_cmp_i8: +; ZBB: # %bb.0: +; ZBB-NEXT: andi a1, a1, 255 +; ZBB-NEXT: andi a0, a0, 255 +; ZBB-NEXT: minu a2, a0, a1 +; ZBB-NEXT: maxu a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 +; ZBB-NEXT: ret %cmp = icmp ugt i8 %a, %b %ab = sub i8 %a, %b %ba = sub i8 %b, %a @@ -1354,13 +1371,11 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 16 ; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a3, a1, a2 -; RV32I-NEXT: and a2, a0, a2 -; RV32I-NEXT: bgeu a2, a3, .LBB19_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB19_2: +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; @@ -1368,26 +1383,21 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: addiw a2, a2, -1 -; RV64I-NEXT: and a3, a1, a2 -; RV64I-NEXT: and a2, a0, a2 -; RV64I-NEXT: bgeu a2, a3, .LBB19_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a0, a1, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB19_2: +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_cmp_i16: ; ZBB: # %bb.0: -; ZBB-NEXT: zext.h a2, a1 -; ZBB-NEXT: zext.h a3, a0 -; ZBB-NEXT: bgeu a3, a2, .LBB19_2 -; ZBB-NEXT: # %bb.1: -; ZBB-NEXT: sub a0, a1, a0 -; ZBB-NEXT: ret -; ZBB-NEXT: .LBB19_2: -; ZBB-NEXT: sub a0, a0, a1 +; ZBB-NEXT: zext.h a1, a1 +; ZBB-NEXT: zext.h a0, a0 +; ZBB-NEXT: minu a2, a0, a1 +; ZBB-NEXT: maxu a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 ; ZBB-NEXT: ret %cmp = icmp uge i16 %a, %b %ab = sub i16 %a, %b @@ -1399,46 +1409,42 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: abd_cmp_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: bltu a0, a1, .LBB20_2 +; RV32I-NEXT: bltu a1, a0, .LBB20_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB20_2: -; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a1 -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: bltu a3, a2, .LBB20_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a0, a0, a1 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB20_2: -; RV64I-NEXT: subw a0, a1, a0 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: bltu a0, a1, .LBB20_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sub a0, a0, a1 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB20_2: -; RV32ZBB-NEXT: sub a0, a1, a0 +; RV32ZBB-NEXT: minu a2, a0, a1 +; RV32ZBB-NEXT: maxu a0, a0, a1 +; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sext.w a2, a1 -; RV64ZBB-NEXT: sext.w a3, a0 -; RV64ZBB-NEXT: bltu a3, a2, .LBB20_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: subw a0, a0, a1 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB20_2: -; RV64ZBB-NEXT: subw a0, a1, a0 +; RV64ZBB-NEXT: slli a1, a1, 32 +; RV64ZBB-NEXT: srli a1, a1, 32 +; RV64ZBB-NEXT: slli a0, a0, 32 +; RV64ZBB-NEXT: srli a0, a0, 32 +; RV64ZBB-NEXT: minu a2, a0, a1 +; RV64ZBB-NEXT: maxu a0, a0, a1 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp ult i32 %a, %b %ab = sub i32 %a, %b @@ -1713,4 +1719,5 @@ declare i16 @llvm.umin.i16(i16, i16) declare i32 @llvm.umin.i32(i32, i32) declare i64 @llvm.umin.i64(i64, i64) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} ; NOZBB: {{.*}} diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll index b72bb46209d2d66..246cd8e0e852d5e 100644 --- a/llvm/test/CodeGen/X86/abds-neg.ll +++ b/llvm/test/CodeGen/X86/abds-neg.ll @@ -853,9 +853,8 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: subq %rsi, %rax -; X64-NEXT: negq %rax -; X64-NEXT: subq %rsi, %rdi -; X64-NEXT: cmovgeq %rdi, %rax +; X64-NEXT: subq %rdi, %rsi +; X64-NEXT: cmovgeq %rsi, %rax ; X64-NEXT: retq %cmp = icmp slt i64 %a, %b %ab = sub i64 %a, %b @@ -907,14 +906,14 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; ; X64-LABEL: abd_cmp_i128: ; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: sbbq %rsi, %r8 -; X64-NEXT: subq %rdx, %rdi -; X64-NEXT: sbbq %rcx, %rsi -; X64-NEXT: cmovgeq %rdi, %rax -; X64-NEXT: cmovgeq %rsi, %r8 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rdx, %rax +; X64-NEXT: movq %rsi, %r8 +; X64-NEXT: sbbq %rcx, %r8 +; X64-NEXT: subq %rdi, %rdx +; X64-NEXT: sbbq %rsi, %rcx +; X64-NEXT: cmovgeq %rdx, %rax +; X64-NEXT: cmovgeq %rcx, %r8 ; X64-NEXT: movq %r8, %rdx ; X64-NEXT: retq %cmp = icmp slt i128 %a, %b diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll index e972ef3787e4d59..9f3b99b349aedac 100644 --- a/llvm/test/CodeGen/X86/abds.ll +++ b/llvm/test/CodeGen/X86/abds.ll @@ -648,27 +648,23 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_cmp_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: subb %cl, %dl -; X86-NEXT: negb %dl -; X86-NEXT: subb %cl, %al -; X86-NEXT: movzbl %al, %ecx -; X86-NEXT: movzbl %dl, %eax -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: cmovsl %ecx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_cmp_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subb %sil, %al -; X64-NEXT: negb %al -; X64-NEXT: subb %sil, %dil -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: movsbl %sil, %eax +; X64-NEXT: movsbl %dil, %ecx +; X64-NEXT: subl %eax, %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmovsl %ecx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %cmp = icmp sgt i8 %a, %b @@ -681,27 +677,23 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_cmp_i16: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: subw %dx, %si -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: negl %eax -; X86-NEXT: cmpw %dx, %cx -; X86-NEXT: cmovgel %esi, %eax +; X86-NEXT: cmovsl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: abd_cmp_i16: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: subw %si, %cx +; X64-NEXT: movswl %si, %eax +; X64-NEXT: movswl %di, %ecx +; X64-NEXT: subl %eax, %ecx ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: negl %eax -; X64-NEXT: cmpw %si, %di -; X64-NEXT: cmovgel %ecx, %eax +; X64-NEXT: cmovsl %ecx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %cmp = icmp sge i16 %a, %b @@ -716,9 +708,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: subl %ecx, %edx -; X86-NEXT: negl %edx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: subl %eax, %edx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: cmovll %edx, %eax ; X86-NEXT: retl @@ -727,9 +718,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: subl %esi, %eax -; X64-NEXT: negl %eax -; X64-NEXT: subl %esi, %edi -; X64-NEXT: cmovgel %edi, %eax +; X64-NEXT: subl %edi, %esi +; X64-NEXT: cmovgel %esi, %eax ; X64-NEXT: retq %cmp = icmp slt i32 %a, %b %ab = sub i32 %a, %b diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll index e808e0f21babf28..c8fa19cb661b6bb 100644 --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -639,27 +639,23 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_cmp_i16: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: subw %dx, %si -; X86-NEXT: movl %esi, %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: negl %eax -; X86-NEXT: cmpw %dx, %cx -; X86-NEXT: cmovael %esi, %eax +; X86-NEXT: cmovsl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: abd_cmp_i16: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: subw %si, %cx +; X64-NEXT: movzwl %si, %eax +; X64-NEXT: movzwl %di, %ecx +; X64-NEXT: subl %eax, %ecx ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: negl %eax -; X64-NEXT: cmpw %si, %di -; X64-NEXT: cmovael %ecx, %eax +; X64-NEXT: cmovsl %ecx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %cmp = icmp uge i16 %a, %b