Skip to content

Commit

Permalink
8308915: RISC-V: Improve temporary vector register usage avoiding the…
Browse files Browse the repository at this point in the history
… use of v0

Reviewed-by: yzhu, fyang
  • Loading branch information
DingliZhang authored and RealFYang committed May 29, 2023
1 parent 547a8b4 commit e21f865
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 64 deletions.
39 changes: 20 additions & 19 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1377,7 +1377,7 @@ void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register resul
srli(cnt, cnt, 1);
}

element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
element_compare(a1, a2, result, cnt, tmp1, tmp2, v2, v4, v2, elem_size == 1, DONE);

bind(DONE);
BLOCK_COMMENT("} string_equals_v");
Expand All @@ -1387,17 +1387,17 @@ void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register resul
// base: Address of a buffer to be zeroed
// cnt: Count in HeapWords
//
// base, cnt, v0, v1 and t0 are clobbered.
// base, cnt, v4, v5, v6, v7 and t0 are clobbered.
void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
Label loop;

// making zero words
vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
vxor_vv(v0, v0, v0);
vxor_vv(v4, v4, v4);

bind(loop);
vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
vse64_v(v0, base);
vse64_v(v4, base);
sub(cnt, cnt, t0);
shadd(base, t0, base, t0, 3);
bnez(cnt, loop);
Expand Down Expand Up @@ -1430,7 +1430,7 @@ void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register resul
la(a1, Address(a1, base_offset));
la(a2, Address(a2, base_offset));

element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
element_compare(a1, a2, result, cnt1, tmp1, tmp2, v2, v4, v2, elem_size == 1, DONE);

bind(DONE);

Expand Down Expand Up @@ -1466,29 +1466,30 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
bind(L);

if (str1_isL == str2_isL) { // LL or UU
element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v2, encLL, DIFFERENCE);
j(DONE);
} else { // LU or UL
Register strL = encLU ? str1 : str2;
Register strU = encLU ? str2 : str1;
VectorRegister vstr1 = encLU ? v4 : v0;
VectorRegister vstr2 = encLU ? v0 : v4;
VectorRegister vstr1 = encLU ? v8 : v4;
VectorRegister vstr2 = encLU ? v4 : v8;

bind(loop);
vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
vle8_v(vstr1, strL);
vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
vzext_vf2(vstr2, vstr1);
vle16_v(vstr1, strU);
vmsne_vv(v0, vstr2, vstr1);
vfirst_m(tmp2, v0);
vmsne_vv(v4, vstr2, vstr1);
vfirst_m(tmp2, v4);
bgez(tmp2, DIFFERENCE);
sub(cnt2, cnt2, tmp1);
add(strL, strL, tmp1);
shadd(strU, tmp1, strU, tmp1, 1);
bnez(cnt2, loop);
j(DONE);
}

bind(DIFFERENCE);
slli(tmp1, tmp2, 1);
add(str1, str1, str1_isL ? tmp2 : tmp1);
Expand All @@ -1507,10 +1508,10 @@ void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Registe
BLOCK_COMMENT("byte_array_inflate_v {");
bind(loop);
vsetvli(tmp, len, Assembler::e8, Assembler::m2);
vle8_v(v2, src);
vle8_v(v6, src);
vsetvli(t0, len, Assembler::e16, Assembler::m4);
vzext_vf2(v0, v2);
vse16_v(v0, dst);
vzext_vf2(v4, v6);
vse16_v(v4, dst);
sub(len, len, tmp);
add(src, src, tmp);
shadd(dst, tmp, dst, tmp, 1);
Expand Down Expand Up @@ -1573,9 +1574,9 @@ void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register r

bind(LOOP);
vsetvli(t0, len, Assembler::e8, Assembler::m4);
vle8_v(v0, ary);
vmslt_vx(v0, v0, zr);
vfirst_m(tmp, v0);
vle8_v(v4, ary);
vmslt_vx(v4, v4, zr);
vfirst_m(tmp, v4);
bgez(tmp, SET_RESULT);
// if tmp == -1, all bytes are positive
add(result, result, t0);
Expand Down Expand Up @@ -1603,9 +1604,9 @@ void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
bind(loop);
vsetvli(tmp1, cnt1, sew, Assembler::m4);
vlex_v(v0, str1, sew);
vmseq_vx(v0, v0, ch);
vfirst_m(tmp2, v0);
vlex_v(v4, str1, sew);
vmseq_vx(v4, v4, ch);
vfirst_m(tmp2, v4);
bgez(tmp2, MATCH); // if equal, return index

add(result, result, tmp1);
Expand Down
68 changes: 64 additions & 4 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,26 @@ reg_class v7_reg(
V7, V7_H, V7_J, V7_K
);

// class for vector register v8
reg_class v8_reg(
V8, V8_H, V8_J, V8_K
);

// class for vector register v9
reg_class v9_reg(
V9, V9_H, V9_J, V9_K
);

// class for vector register v10
reg_class v10_reg(
V10, V10_H, V10_J, V10_K
);

// class for vector register v11
reg_class v11_reg(
V11, V11_H, V11_J, V11_K
);

// class for condition codes
reg_class reg_flags(RFLAGS);

Expand Down Expand Up @@ -3628,6 +3648,46 @@ operand vReg_V7()
interface(REG_INTER);
%}

operand vReg_V8()
%{
constraint(ALLOC_IN_RC(v8_reg));
match(VecA);
match(vReg);
op_cost(0);
format %{ %}
interface(REG_INTER);
%}

operand vReg_V9()
%{
constraint(ALLOC_IN_RC(v9_reg));
match(VecA);
match(vReg);
op_cost(0);
format %{ %}
interface(REG_INTER);
%}

operand vReg_V10()
%{
constraint(ALLOC_IN_RC(v10_reg));
match(VecA);
match(vReg);
op_cost(0);
format %{ %}
interface(REG_INTER);
%}

operand vReg_V11()
%{
constraint(ALLOC_IN_RC(v11_reg));
match(VecA);
match(vReg);
op_cost(0);
format %{ %}
interface(REG_INTER);
%}

operand vRegMask()
%{
constraint(ALLOC_IN_RC(vmask_reg));
Expand Down Expand Up @@ -9940,7 +10000,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
%{
predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
Expand All @@ -9958,7 +10018,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
%{
predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
Expand All @@ -9975,7 +10035,7 @@ instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
%{
predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
Expand All @@ -9993,7 +10053,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
rFlagsReg cr)
%{
predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
Expand Down
Loading

1 comment on commit e21f865

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.