Skip to content

Commit

Permalink
RISC-V: Add tuple types support
Browse files Browse the repository at this point in the history
gcc/ChangeLog:

	* config/riscv/riscv-modes.def (RVV_TUPLE_MODES): New macro.
	(RVV_TUPLE_PARTIAL_MODES): Ditto.
	* config/riscv/riscv-protos.h (riscv_v_ext_tuple_mode_p): New
	function.
	(get_nf): Ditto.
	(get_subpart_mode): Ditto.
	(get_tuple_mode): Ditto.
	(expand_tuple_move): Ditto.
	* config/riscv/riscv-v.cc (ENTRY): New macro.
	(TUPLE_ENTRY): Ditto.
	(get_nf): New function.
	(get_subpart_mode): Ditto.
	(get_tuple_mode): Ditto.
	(expand_tuple_move): Ditto.
	* config/riscv/riscv-vector-builtins.cc (DEF_RVV_TUPLE_TYPE):
	New macro.
	(register_tuple_type): New function
	* config/riscv/riscv-vector-builtins.def (DEF_RVV_TUPLE_TYPE):
	New macro.
	(vint8mf8x2_t): New macro.
	(vuint8mf8x2_t): Ditto.
	(vint8mf8x3_t): Ditto.
	(vuint8mf8x3_t): Ditto.
	(vint8mf8x4_t): Ditto.
	(vuint8mf8x4_t): Ditto.
	(vint8mf8x5_t): Ditto.
	(vuint8mf8x5_t): Ditto.
	(vint8mf8x6_t): Ditto.
	(vuint8mf8x6_t): Ditto.
	(vint8mf8x7_t): Ditto.
	(vuint8mf8x7_t): Ditto.
	(vint8mf8x8_t): Ditto.
	(vuint8mf8x8_t): Ditto.
	(vint8mf4x2_t): Ditto.
	(vuint8mf4x2_t): Ditto.
	(vint8mf4x3_t): Ditto.
	(vuint8mf4x3_t): Ditto.
	(vint8mf4x4_t): Ditto.
	(vuint8mf4x4_t): Ditto.
	(vint8mf4x5_t): Ditto.
	(vuint8mf4x5_t): Ditto.
	(vint8mf4x6_t): Ditto.
	(vuint8mf4x6_t): Ditto.
	(vint8mf4x7_t): Ditto.
	(vuint8mf4x7_t): Ditto.
	(vint8mf4x8_t): Ditto.
	(vuint8mf4x8_t): Ditto.
	(vint8mf2x2_t): Ditto.
	(vuint8mf2x2_t): Ditto.
	(vint8mf2x3_t): Ditto.
	(vuint8mf2x3_t): Ditto.
	(vint8mf2x4_t): Ditto.
	(vuint8mf2x4_t): Ditto.
	(vint8mf2x5_t): Ditto.
	(vuint8mf2x5_t): Ditto.
	(vint8mf2x6_t): Ditto.
	(vuint8mf2x6_t): Ditto.
	(vint8mf2x7_t): Ditto.
	(vuint8mf2x7_t): Ditto.
	(vint8mf2x8_t): Ditto.
	(vuint8mf2x8_t): Ditto.
	(vint8m1x2_t): Ditto.
	(vuint8m1x2_t): Ditto.
	(vint8m1x3_t): Ditto.
	(vuint8m1x3_t): Ditto.
	(vint8m1x4_t): Ditto.
	(vuint8m1x4_t): Ditto.
	(vint8m1x5_t): Ditto.
	(vuint8m1x5_t): Ditto.
	(vint8m1x6_t): Ditto.
	(vuint8m1x6_t): Ditto.
	(vint8m1x7_t): Ditto.
	(vuint8m1x7_t): Ditto.
	(vint8m1x8_t): Ditto.
	(vuint8m1x8_t): Ditto.
	(vint8m2x2_t): Ditto.
	(vuint8m2x2_t): Ditto.
	(vint8m2x3_t): Ditto.
	(vuint8m2x3_t): Ditto.
	(vint8m2x4_t): Ditto.
	(vuint8m2x4_t): Ditto.
	(vint8m4x2_t): Ditto.
	(vuint8m4x2_t): Ditto.
	(vint16mf4x2_t): Ditto.
	(vuint16mf4x2_t): Ditto.
	(vint16mf4x3_t): Ditto.
	(vuint16mf4x3_t): Ditto.
	(vint16mf4x4_t): Ditto.
	(vuint16mf4x4_t): Ditto.
	(vint16mf4x5_t): Ditto.
	(vuint16mf4x5_t): Ditto.
	(vint16mf4x6_t): Ditto.
	(vuint16mf4x6_t): Ditto.
	(vint16mf4x7_t): Ditto.
	(vuint16mf4x7_t): Ditto.
	(vint16mf4x8_t): Ditto.
	(vuint16mf4x8_t): Ditto.
	(vint16mf2x2_t): Ditto.
	(vuint16mf2x2_t): Ditto.
	(vint16mf2x3_t): Ditto.
	(vuint16mf2x3_t): Ditto.
	(vint16mf2x4_t): Ditto.
	(vuint16mf2x4_t): Ditto.
	(vint16mf2x5_t): Ditto.
	(vuint16mf2x5_t): Ditto.
	(vint16mf2x6_t): Ditto.
	(vuint16mf2x6_t): Ditto.
	(vint16mf2x7_t): Ditto.
	(vuint16mf2x7_t): Ditto.
	(vint16mf2x8_t): Ditto.
	(vuint16mf2x8_t): Ditto.
	(vint16m1x2_t): Ditto.
	(vuint16m1x2_t): Ditto.
	(vint16m1x3_t): Ditto.
	(vuint16m1x3_t): Ditto.
	(vint16m1x4_t): Ditto.
	(vuint16m1x4_t): Ditto.
	(vint16m1x5_t): Ditto.
	(vuint16m1x5_t): Ditto.
	(vint16m1x6_t): Ditto.
	(vuint16m1x6_t): Ditto.
	(vint16m1x7_t): Ditto.
	(vuint16m1x7_t): Ditto.
	(vint16m1x8_t): Ditto.
	(vuint16m1x8_t): Ditto.
	(vint16m2x2_t): Ditto.
	(vuint16m2x2_t): Ditto.
	(vint16m2x3_t): Ditto.
	(vuint16m2x3_t): Ditto.
	(vint16m2x4_t): Ditto.
	(vuint16m2x4_t): Ditto.
	(vint16m4x2_t): Ditto.
	(vuint16m4x2_t): Ditto.
	(vint32mf2x2_t): Ditto.
	(vuint32mf2x2_t): Ditto.
	(vint32mf2x3_t): Ditto.
	(vuint32mf2x3_t): Ditto.
	(vint32mf2x4_t): Ditto.
	(vuint32mf2x4_t): Ditto.
	(vint32mf2x5_t): Ditto.
	(vuint32mf2x5_t): Ditto.
	(vint32mf2x6_t): Ditto.
	(vuint32mf2x6_t): Ditto.
	(vint32mf2x7_t): Ditto.
	(vuint32mf2x7_t): Ditto.
	(vint32mf2x8_t): Ditto.
	(vuint32mf2x8_t): Ditto.
	(vint32m1x2_t): Ditto.
	(vuint32m1x2_t): Ditto.
	(vint32m1x3_t): Ditto.
	(vuint32m1x3_t): Ditto.
	(vint32m1x4_t): Ditto.
	(vuint32m1x4_t): Ditto.
	(vint32m1x5_t): Ditto.
	(vuint32m1x5_t): Ditto.
	(vint32m1x6_t): Ditto.
	(vuint32m1x6_t): Ditto.
	(vint32m1x7_t): Ditto.
	(vuint32m1x7_t): Ditto.
	(vint32m1x8_t): Ditto.
	(vuint32m1x8_t): Ditto.
	(vint32m2x2_t): Ditto.
	(vuint32m2x2_t): Ditto.
	(vint32m2x3_t): Ditto.
	(vuint32m2x3_t): Ditto.
	(vint32m2x4_t): Ditto.
	(vuint32m2x4_t): Ditto.
	(vint32m4x2_t): Ditto.
	(vuint32m4x2_t): Ditto.
	(vint64m1x2_t): Ditto.
	(vuint64m1x2_t): Ditto.
	(vint64m1x3_t): Ditto.
	(vuint64m1x3_t): Ditto.
	(vint64m1x4_t): Ditto.
	(vuint64m1x4_t): Ditto.
	(vint64m1x5_t): Ditto.
	(vuint64m1x5_t): Ditto.
	(vint64m1x6_t): Ditto.
	(vuint64m1x6_t): Ditto.
	(vint64m1x7_t): Ditto.
	(vuint64m1x7_t): Ditto.
	(vint64m1x8_t): Ditto.
	(vuint64m1x8_t): Ditto.
	(vint64m2x2_t): Ditto.
	(vuint64m2x2_t): Ditto.
	(vint64m2x3_t): Ditto.
	(vuint64m2x3_t): Ditto.
	(vint64m2x4_t): Ditto.
	(vuint64m2x4_t): Ditto.
	(vint64m4x2_t): Ditto.
	(vuint64m4x2_t): Ditto.
	(vfloat32mf2x2_t): Ditto.
	(vfloat32mf2x3_t): Ditto.
	(vfloat32mf2x4_t): Ditto.
	(vfloat32mf2x5_t): Ditto.
	(vfloat32mf2x6_t): Ditto.
	(vfloat32mf2x7_t): Ditto.
	(vfloat32mf2x8_t): Ditto.
	(vfloat32m1x2_t): Ditto.
	(vfloat32m1x3_t): Ditto.
	(vfloat32m1x4_t): Ditto.
	(vfloat32m1x5_t): Ditto.
	(vfloat32m1x6_t): Ditto.
	(vfloat32m1x7_t): Ditto.
	(vfloat32m1x8_t): Ditto.
	(vfloat32m2x2_t): Ditto.
	(vfloat32m2x3_t): Ditto.
	(vfloat32m2x4_t): Ditto.
	(vfloat32m4x2_t): Ditto.
	(vfloat64m1x2_t): Ditto.
	(vfloat64m1x3_t): Ditto.
	(vfloat64m1x4_t): Ditto.
	(vfloat64m1x5_t): Ditto.
	(vfloat64m1x6_t): Ditto.
	(vfloat64m1x7_t): Ditto.
	(vfloat64m1x8_t): Ditto.
	(vfloat64m2x2_t): Ditto.
	(vfloat64m2x3_t): Ditto.
	(vfloat64m2x4_t): Ditto.
	(vfloat64m4x2_t): Ditto.
	* config/riscv/riscv-vector-builtins.h (DEF_RVV_TUPLE_TYPE):
	Ditto.
	* config/riscv/riscv-vector-switch.def (TUPLE_ENTRY): Ditto.
	* config/riscv/riscv.cc (riscv_v_ext_tuple_mode_p): New
	function.
	(TUPLE_ENTRY): Ditto.
	(riscv_v_ext_mode_p): New function.
	(riscv_v_adjust_nunits): Add tuple mode adjustment.
	(riscv_classify_address): Ditto.
	(riscv_binary_cost): Ditto.
	(riscv_rtx_costs): Ditto.
	(riscv_secondary_memory_needed): Ditto.
	(riscv_hard_regno_nregs): Ditto.
	(riscv_hard_regno_mode_ok): Ditto.
	(riscv_vector_mode_supported_p): Ditto.
	(riscv_regmode_natural_size): Ditto.
	(riscv_array_mode): New function.
	(TARGET_ARRAY_MODE): New target hook.
	* config/riscv/riscv.md: Add tuple modes.
	* config/riscv/vector-iterators.md: Ditto.
	* config/riscv/vector.md (mov<mode>): Add tuple modes data
	movement.
	(*mov<VT:mode>_<P:mode>): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/abi-10.c: New test.
	* gcc.target/riscv/rvv/base/abi-11.c: New test.
	* gcc.target/riscv/rvv/base/abi-12.c: New test.
	* gcc.target/riscv/rvv/base/abi-13.c: New test.
	* gcc.target/riscv/rvv/base/abi-14.c: New test.
	* gcc.target/riscv/rvv/base/abi-15.c: New test.
	* gcc.target/riscv/rvv/base/abi-16.c: New test.
	* gcc.target/riscv/rvv/base/abi-8.c: New test.
	* gcc.target/riscv/rvv/base/abi-9.c: New test.
	* gcc.target/riscv/rvv/base/tuple-1.c: New test.
	* gcc.target/riscv/rvv/base/tuple-10.c: New test.
	* gcc.target/riscv/rvv/base/tuple-11.c: New test.
	* gcc.target/riscv/rvv/base/tuple-12.c: New test.
	* gcc.target/riscv/rvv/base/tuple-13.c: New test.
	* gcc.target/riscv/rvv/base/tuple-14.c: New test.
	* gcc.target/riscv/rvv/base/tuple-15.c: New test.
	* gcc.target/riscv/rvv/base/tuple-16.c: New test.
	* gcc.target/riscv/rvv/base/tuple-17.c: New test.
	* gcc.target/riscv/rvv/base/tuple-18.c: New test.
	* gcc.target/riscv/rvv/base/tuple-19.c: New test.
	* gcc.target/riscv/rvv/base/tuple-2.c: New test.
	* gcc.target/riscv/rvv/base/tuple-20.c: New test.
	* gcc.target/riscv/rvv/base/tuple-21.c: New test.
	* gcc.target/riscv/rvv/base/tuple-22.c: New test.
	* gcc.target/riscv/rvv/base/tuple-23.c: New test.
	* gcc.target/riscv/rvv/base/tuple-24.c: New test.
	* gcc.target/riscv/rvv/base/tuple-25.c: New test.
	* gcc.target/riscv/rvv/base/tuple-26.c: New test.
	* gcc.target/riscv/rvv/base/tuple-27.c: New test.
	* gcc.target/riscv/rvv/base/tuple-3.c: New test.
	* gcc.target/riscv/rvv/base/tuple-4.c: New test.
	* gcc.target/riscv/rvv/base/tuple-5.c: New test.
	* gcc.target/riscv/rvv/base/tuple-6.c: New test.
	* gcc.target/riscv/rvv/base/tuple-7.c: New test.
	* gcc.target/riscv/rvv/base/tuple-8.c: New test.
	* gcc.target/riscv/rvv/base/tuple-9.c: New test.
	* gcc.target/riscv/rvv/base/user-10.c: New test.
	* gcc.target/riscv/rvv/base/user-11.c: New test.
	* gcc.target/riscv/rvv/base/user-12.c: New test.
	* gcc.target/riscv/rvv/base/user-13.c: New test.
	* gcc.target/riscv/rvv/base/user-14.c: New test.
	* gcc.target/riscv/rvv/base/user-15.c: New test.
	* gcc.target/riscv/rvv/base/user-7.c: New test.
	* gcc.target/riscv/rvv/base/user-8.c: New test.
	* gcc.target/riscv/rvv/base/user-9.c: New test.

Signed-off-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
  • Loading branch information
zhongjuzhe authored and Liaoshihua committed Mar 12, 2024
1 parent a9cd998 commit 7fbb1bb
Show file tree
Hide file tree
Showing 56 changed files with 6,548 additions and 19 deletions.
133 changes: 133 additions & 0 deletions gcc/config/riscv/riscv-modes.def
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,139 @@ VECTOR_MODE_WITH_PREFIX (VNx, INT, QI, 1, 0);
ADJUST_NUNITS (VNx1QI, riscv_v_adjust_nunits (VNx1QImode, 1));
ADJUST_ALIGNMENT (VNx1QI, 1);

/* Tuple modes for segment loads/stores according to NF, NF value can be 2 ~ 8. */

/*
| Mode | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 | MIN_VLEN=128 | MIN_VLEN=128 |
| | LMUL | SEW/LMUL | LMUL | SEW/LMUL | LMUL | SEW/LMUL |
| VNxNFx1QI | MF4 | 32 | MF8 | 64 | N/A | N/A |
| VNxNFx2QI | MF2 | 16 | MF4 | 32 | MF8 | 64 |
| VNxNFx4QI | M1 | 8 | MF2 | 16 | MF4 | 32 |
| VNxNFx8QI | M2 | 4 | M1 | 8 | MF2 | 16 |
| VNxNFx16QI | M4 | 2 | M2 | 4 | M1 | 8 |
| VNxNFx32QI | M8 | 1 | M4 | 2 | M2 | 4 |
| VNxNFx64QI | N/A | N/A | M8 | 1 | M4 | 2 |
| VNxNFx128QI | N/A | N/A | N/A | N/A | M8 | 1 |
| VNxNFx1(HI|HF) | MF2 | 32 | MF4 | 64 | N/A | N/A |
| VNxNFx2(HI|HF) | M1 | 16 | MF2 | 32 | MF4 | 64 |
| VNxNFx4(HI|HF) | M2 | 8 | M1 | 16 | MF2 | 32 |
| VNxNFx8(HI|HF) | M4 | 4 | M2 | 8 | M1 | 16 |
| VNxNFx16(HI|HF)| M8 | 2 | M4 | 4 | M2 | 8 |
| VNxNFx32(HI|HF)| N/A | N/A | M8 | 2 | M4 | 4 |
| VNxNFx64(HI|HF)| N/A | N/A | N/A | N/A | M8 | 2 |
| VNxNFx1(SI|SF) | M1 | 32 | MF2 | 64 | MF2 | 64 |
| VNxNFx2(SI|SF) | M2 | 16 | M1 | 32 | M1 | 32 |
| VNxNFx4(SI|SF) | M4 | 8 | M2 | 16 | M2 | 16 |
| VNxNFx8(SI|SF) | M8 | 4 | M4 | 8 | M4 | 8 |
| VNxNFx16(SI|SF)| N/A | N/A | M8 | 4 | M8 | 4 |
| VNxNFx1(DI|DF) | N/A | N/A | M1 | 64 | N/A | N/A |
| VNxNFx2(DI|DF) | N/A | N/A | M2 | 32 | M1 | 64 |
| VNxNFx4(DI|DF) | N/A | N/A | M4 | 16 | M2 | 32 |
| VNxNFx8(DI|DF) | N/A | N/A | M8 | 8 | M4 | 16 |
| VNxNFx16(DI|DF)| N/A | N/A | N/A | N/A | M8 | 8 |
*/

#define RVV_TUPLE_MODES(NBYTES, NSUBPARTS, VB, VH, VS, VD) \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, NBYTES, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, HI, NBYTES / 2, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, SI, NBYTES / 4, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, SF, NBYTES / 4, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, DI, NBYTES / 8, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, DF, NBYTES / 8, 1); \
ADJUST_NUNITS (VNx##NSUBPARTS##x##VB##QI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VB##QI##mode, \
VB * NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x##VH##HI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VH##HI##mode, \
VH * NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x##VS##SI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VS##SI##mode, \
VS * NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x##VD##DI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VD##DI##mode, \
VD * NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x##VS##SF, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VS##SF##mode, \
VS * NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x##VD##DF, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VD##DF##mode, \
VD * NSUBPARTS)); \
\
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VB##QI, 1); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VH##HI, 2); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VS##SI, 4); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VD##DI, 8); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VS##SF, 4); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VD##DF, 8);

RVV_TUPLE_MODES (8, 2, 8, 4, 2, 1)
RVV_TUPLE_MODES (8, 3, 8, 4, 2, 1)
RVV_TUPLE_MODES (8, 4, 8, 4, 2, 1)
RVV_TUPLE_MODES (8, 5, 8, 4, 2, 1)
RVV_TUPLE_MODES (8, 6, 8, 4, 2, 1)
RVV_TUPLE_MODES (8, 7, 8, 4, 2, 1)
RVV_TUPLE_MODES (8, 8, 8, 4, 2, 1)

RVV_TUPLE_MODES (16, 2, 16, 8, 4, 2)
RVV_TUPLE_MODES (16, 3, 16, 8, 4, 2)
RVV_TUPLE_MODES (16, 4, 16, 8, 4, 2)
RVV_TUPLE_MODES (16, 5, 16, 8, 4, 2)
RVV_TUPLE_MODES (16, 6, 16, 8, 4, 2)
RVV_TUPLE_MODES (16, 7, 16, 8, 4, 2)
RVV_TUPLE_MODES (16, 8, 16, 8, 4, 2)

RVV_TUPLE_MODES (32, 2, 32, 16, 8, 4)
RVV_TUPLE_MODES (32, 3, 32, 16, 8, 4)
RVV_TUPLE_MODES (32, 4, 32, 16, 8, 4)

RVV_TUPLE_MODES (64, 2, 64, 32, 16, 8)

#define RVV_TUPLE_PARTIAL_MODES(NSUBPARTS) \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, 1, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, HI, 1, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, SI, 1, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, SF, 1, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, 2, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, HI, 2, 1); \
VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, 4, 1); \
\
ADJUST_NUNITS (VNx##NSUBPARTS##x1QI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x1QI##mode, \
NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x1HI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x1HI##mode, \
NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x1SI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x1SI##mode, \
NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x1SF, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x1SF##mode, \
NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x2QI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x2QI##mode, \
2 * NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x2HI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x2HI##mode, \
2 * NSUBPARTS)); \
ADJUST_NUNITS (VNx##NSUBPARTS##x4QI, \
riscv_v_adjust_nunits (VNx##NSUBPARTS##x4QI##mode, \
4 * NSUBPARTS)); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1QI, 1); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1HI, 2); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1SI, 4); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1SF, 4); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x2QI, 1); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x2HI, 2); \
ADJUST_ALIGNMENT (VNx##NSUBPARTS##x4QI, 1);

RVV_TUPLE_PARTIAL_MODES (2)
RVV_TUPLE_PARTIAL_MODES (3)
RVV_TUPLE_PARTIAL_MODES (4)
RVV_TUPLE_PARTIAL_MODES (5)
RVV_TUPLE_PARTIAL_MODES (6)
RVV_TUPLE_PARTIAL_MODES (7)
RVV_TUPLE_PARTIAL_MODES (8)

/* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can
be 65536 for a single vector register which means the vector mode in
GCC can be maximum = 65536 * 8 bits (LMUL=8).
Expand Down
5 changes: 5 additions & 0 deletions gcc/config/riscv/riscv-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ extern bool riscv_gpr_save_operation_p (rtx);
extern void riscv_reinit (void);
extern poly_uint64 riscv_regmode_natural_size (machine_mode);
extern bool riscv_v_ext_vector_mode_p (machine_mode);
extern bool riscv_v_ext_tuple_mode_p (machine_mode);
extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT);
extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
Expand Down Expand Up @@ -170,6 +171,8 @@ void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
enum vlmul_type get_vlmul (machine_mode);
unsigned int get_ratio (machine_mode);
unsigned int get_nf (machine_mode);
machine_mode get_subpart_mode (machine_mode);
int get_ta (rtx);
int get_ma (rtx);
int get_avl_type (rtx);
Expand All @@ -191,6 +194,7 @@ enum tail_policy get_prefer_tail_policy ();
enum mask_policy get_prefer_mask_policy ();
rtx get_avl_type_rtx (enum avl_type);
opt_machine_mode get_vector_mode (scalar_mode, poly_uint64);
opt_machine_mode get_tuple_mode (machine_mode, unsigned int);
bool simm5_p (rtx);
bool neg_simm5_p (rtx);
#ifdef RTX_CODE
Expand All @@ -212,6 +216,7 @@ enum vlen_enum
bool slide1_sew64_helper (int, machine_mode, machine_mode,
machine_mode, rtx *);
rtx gen_avl_for_scalar_move (rtx);
void expand_tuple_move (machine_mode, rtx *);
}

/* We classify builtin types into two classes:
Expand Down
188 changes: 185 additions & 3 deletions gcc/config/riscv/riscv-v.cc
Original file line number Diff line number Diff line change
Expand Up @@ -342,17 +342,32 @@ struct mode_vtype_group
uint8_t ratio_for_min_vlen64[NUM_MACHINE_MODES];
enum vlmul_type vlmul_for_for_vlen128[NUM_MACHINE_MODES];
uint8_t ratio_for_for_vlen128[NUM_MACHINE_MODES];
machine_mode subpart_mode[NUM_MACHINE_MODES];
uint8_t nf[NUM_MACHINE_MODES];
mode_vtype_group ()
{
#define ENTRY(MODE, REQUIREMENT, VLMUL_FOR_MIN_VLEN32, RATIO_FOR_MIN_VLEN32, \
VLMUL_FOR_MIN_VLEN64, RATIO_FOR_MIN_VLEN64, \
VLMUL_FOR_FOR_VLEN128, RATIO_FOR_FOR_VLEN128) \
VLMUL_FOR_MIN_VLEN128, RATIO_FOR_MIN_VLEN128) \
vlmul_for_min_vlen32[MODE##mode] = VLMUL_FOR_MIN_VLEN32; \
ratio_for_min_vlen32[MODE##mode] = RATIO_FOR_MIN_VLEN32; \
vlmul_for_min_vlen64[MODE##mode] = VLMUL_FOR_MIN_VLEN64; \
ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; \
vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_FOR_VLEN128; \
ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_FOR_VLEN128;
vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128; \
ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
#include "riscv-vector-switch.def"
#define TUPLE_ENTRY(MODE, REQUIREMENT, SUBPART_MODE, NF, VLMUL_FOR_MIN_VLEN32, \
RATIO_FOR_MIN_VLEN32, VLMUL_FOR_MIN_VLEN64, \
RATIO_FOR_MIN_VLEN64, VLMUL_FOR_MIN_VLEN128, \
RATIO_FOR_MIN_VLEN128) \
subpart_mode[MODE##mode] = SUBPART_MODE##mode; \
nf[MODE##mode] = NF; \
vlmul_for_min_vlen32[MODE##mode] = VLMUL_FOR_MIN_VLEN32; \
ratio_for_min_vlen32[MODE##mode] = RATIO_FOR_MIN_VLEN32; \
vlmul_for_min_vlen64[MODE##mode] = VLMUL_FOR_MIN_VLEN64; \
ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; \
vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128; \
ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
#include "riscv-vector-switch.def"
}
};
Expand All @@ -371,6 +386,26 @@ get_vlmul (machine_mode mode)
return mode_vtype_infos.vlmul_for_min_vlen64[mode];
}

/* Return the NF value of the corresponding mode. */
unsigned int
get_nf (machine_mode mode)
{
/* We don't allow non-tuple modes go through this function. */
gcc_assert (riscv_v_ext_tuple_mode_p (mode));
return mode_vtype_infos.nf[mode];
}

/* Return the subpart mode of the tuple mode. For VNx2x1SImode,
the subpart mode is VNx1SImode. This will help to build
array/struct type in builtins. */
machine_mode
get_subpart_mode (machine_mode mode)
{
/* We don't allow non-tuple modes go through this function. */
gcc_assert (riscv_v_ext_tuple_mode_p (mode));
return mode_vtype_infos.subpart_mode[mode];
}

/* Get ratio according to machine mode. */
unsigned int
get_ratio (machine_mode mode)
Expand Down Expand Up @@ -452,6 +487,24 @@ get_vector_mode (scalar_mode inner_mode, poly_uint64 nunits)
return opt_machine_mode ();
}

/* Return the RVV tuple mode if we can find the legal tuple mode for the
corresponding subpart mode and NF. */
opt_machine_mode
get_tuple_mode (machine_mode subpart_mode, unsigned int nf)
{
poly_uint64 nunits = GET_MODE_NUNITS (subpart_mode) * nf;
scalar_mode inner_mode = GET_MODE_INNER (subpart_mode);
enum mode_class mclass = GET_MODE_CLASS (subpart_mode);
machine_mode mode;
FOR_EACH_MODE_IN_CLASS (mode, mclass)
if (inner_mode == GET_MODE_INNER (mode)
&& known_eq (nunits, GET_MODE_NUNITS (mode))
&& riscv_v_ext_tuple_mode_p (mode)
&& get_subpart_mode (mode) == subpart_mode)
return mode;
return opt_machine_mode ();
}

bool
simm5_p (rtx x)
{
Expand Down Expand Up @@ -742,4 +795,133 @@ gen_avl_for_scalar_move (rtx avl)
}
}

/* Expand tuple modes data movement for. */
void
expand_tuple_move (machine_mode mask_mode, rtx *ops)
{
unsigned int i;
machine_mode tuple_mode = GET_MODE (ops[0]);
machine_mode subpart_mode = get_subpart_mode (tuple_mode);
poly_int64 subpart_size = GET_MODE_SIZE (subpart_mode);
unsigned int nf = get_nf (tuple_mode);
bool fractional_p = known_lt (subpart_size, BYTES_PER_RISCV_VECTOR);

if (REG_P (ops[0]) && CONST_VECTOR_P (ops[1]))
{
rtx val;
gcc_assert (can_create_pseudo_p ()
&& const_vec_duplicate_p (ops[1], &val));
for (i = 0; i < nf; ++i)
{
poly_int64 offset = i * subpart_size;
rtx subreg
= simplify_gen_subreg (subpart_mode, ops[0], tuple_mode, offset);
rtx dup = gen_const_vec_duplicate (subpart_mode, val);
emit_move_insn (subreg, dup);
}
}
else if (REG_P (ops[0]) && REG_P (ops[1]))
{
for (i = 0; i < nf; ++i)
{
int index = i;

/* Take NF = 2 and LMUL = 1 for example:
- move v8 to v9:
vmv1r v10,v9
vmv1r v9,v8
- move v8 to v7:
vmv1r v7,v8
vmv1r v8,v9 */
if (REGNO (ops[0]) > REGNO (ops[1]))
index = nf - 1 - i;
poly_int64 offset = index * subpart_size;
rtx dst_subreg
= simplify_gen_subreg (subpart_mode, ops[0], tuple_mode, offset);
rtx src_subreg
= simplify_gen_subreg (subpart_mode, ops[1], tuple_mode, offset);
emit_insn (gen_rtx_SET (dst_subreg, src_subreg));
}
}
else
{
/* Expand tuple memory data movement. */
gcc_assert (MEM_P (ops[0]) || MEM_P (ops[1]));
rtx offset = gen_int_mode (subpart_size, Pmode);
if (!subpart_size.is_constant ())
{
emit_move_insn (ops[2], gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
if (fractional_p)
{
unsigned int factor
= exact_div (BYTES_PER_RISCV_VECTOR, subpart_size)
.to_constant ();
rtx pat
= gen_rtx_ASHIFTRT (Pmode, ops[2],
gen_int_mode (exact_log2 (factor), Pmode));
emit_insn (gen_rtx_SET (ops[2], pat));
}

if (known_gt (subpart_size, BYTES_PER_RISCV_VECTOR))
{
unsigned int factor
= exact_div (subpart_size, BYTES_PER_RISCV_VECTOR)
.to_constant ();
rtx pat
= gen_rtx_ASHIFT (Pmode, ops[2],
gen_int_mode (exact_log2 (factor), Pmode));
emit_insn (gen_rtx_SET (ops[2], pat));
}
offset = ops[2];
}

if (MEM_P (ops[1]))
{
/* Load operations. */
emit_move_insn (ops[3], XEXP (ops[1], 0));
for (i = 0; i < nf; i++)
{
rtx subreg = simplify_gen_subreg (subpart_mode, ops[0],
tuple_mode, i * subpart_size);
if (i != 0)
{
rtx new_addr = gen_rtx_PLUS (Pmode, ops[3], offset);
emit_insn (gen_rtx_SET (ops[3], new_addr));
}
rtx mem = gen_rtx_MEM (subpart_mode, ops[3]);

if (fractional_p)
emit_vlmax_op (code_for_pred_mov (subpart_mode), subreg, mem,
ops[4], mask_mode);
else
emit_move_insn (subreg, mem);
}
}
else
{
/* Store operations. */
emit_move_insn (ops[3], XEXP (ops[0], 0));
for (i = 0; i < nf; i++)
{
rtx subreg = simplify_gen_subreg (subpart_mode, ops[1],
tuple_mode, i * subpart_size);
if (i != 0)
{
rtx new_addr = gen_rtx_PLUS (Pmode, ops[3], offset);
emit_insn (gen_rtx_SET (ops[3], new_addr));
}
rtx mem = gen_rtx_MEM (subpart_mode, ops[3]);

if (fractional_p)
emit_vlmax_op (code_for_pred_mov (subpart_mode), mem, subreg,
ops[4], mask_mode);
else
emit_move_insn (mem, subreg);
}
}
}
}

} // namespace riscv_vector
Loading

0 comments on commit 7fbb1bb

Please sign in to comment.