Skip to content

Commit

Permalink
AMDGPU: Add mode register use to s_getreg_b32
Browse files Browse the repository at this point in the history
This should fix reading the wrong mode after setting the mode.
Ideally we would have separate pseudos for the case that we know
does not read mode.
  • Loading branch information
arsenm committed May 7, 2024
1 parent 9eb91f4 commit f548c4d
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 24 deletions.
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1110,14 +1110,15 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <

// This is hasSideEffects to allow its use in readcyclecounter selection.
// FIXME: Need to truncate immediate to 16-bits.
// FIXME: Missing mode register use. Should have separate pseudos for
// known may read MODE and only read MODE.
// FIXME: Should have separate pseudos for known may read MODE and
// only read MODE.
def S_GETREG_B32 : SOPK_Pseudo <
"s_getreg_b32",
(outs SReg_32:$sdst), (ins hwreg:$simm16),
"$sdst, $simm16",
[(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
let hasSideEffects = 1;
let Uses = [MODE];
}

let Defs = [MODE], Uses = [MODE] in {
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2417,12 +2417,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -2455,12 +2455,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -2727,12 +2727,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -2765,12 +2765,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -3294,12 +3294,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -3334,12 +3334,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -3868,12 +3868,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -3906,12 +3906,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -4434,12 +4434,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -4474,12 +4474,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -5010,12 +5010,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -5048,12 +5048,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -5569,12 +5569,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -5607,12 +5607,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -6113,12 +6113,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -6153,12 +6153,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -6619,12 +6619,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -6659,12 +6659,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -7168,12 +7168,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -7206,12 +7206,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -7721,12 +7721,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down Expand Up @@ -7759,12 +7759,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
Expand Down
Loading

0 comments on commit f548c4d

Please sign in to comment.