Skip to content

Commit

Permalink
riscv64: Add vadd.vx
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed May 16, 2023
1 parent 84621b1 commit 6a5e40e
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 12 deletions.
6 changes: 4 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ impl VecAluOpRRR {
pub fn funct6(&self) -> u32 {
// See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
match self {
VecAluOpRRR::VaddVV => 0b000000,
VecAluOpRRR::VaddVV | VecAluOpRRR::VaddVX => 0b000000,
VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010,
VecAluOpRRR::VrsubVX => 0b000011,
VecAluOpRRR::VmulVV => 0b100101,
Expand All @@ -272,7 +272,9 @@ impl VecAluOpRRR {
VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => {
VecOpCategory::OPMVV
}
VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => VecOpCategory::OPIVX,
VecAluOpRRR::VaddVX | VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => {
VecOpCategory::OPIVX
}
}
}
}
Expand Down
6 changes: 6 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
(VxorVV)

;; Vector-Scalar Opcodes
(VaddVX)
(VsubVX)
(VrsubVX)
))
Expand Down Expand Up @@ -189,6 +190,11 @@
(rule (rv_vadd_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 vstate))

;; Helper for emitting the `vadd.vx` instruction.
(decl rv_vadd_vx (Reg Reg VState) Reg)
(rule (rv_vadd_vx vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 vstate))

;; Helper for emitting the `vadd.vi` instruction.
(decl rv_vadd_vi (Reg Imm5 VState) Reg)
(rule (rv_vadd_vi vs2 imm vstate)
Expand Down
10 changes: 8 additions & 2 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,16 @@
(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y)))
(rv_vadd_vv x y ty))

(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y))))
(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat y))))
(rv_vadd_vx x y ty))

(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat x) y)))
(rv_vadd_vx y x ty))

(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y))))
(rv_vadd_vi x y ty))

(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y)))
(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y)))
(rv_vadd_vi y x ty))

;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
Expand Down
158 changes: 158 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,161 @@ block0(v0: i64x2):
; addi sp, sp, 0x10
; ret

function %iadd_splat_i8x16(i8x16, i8) -> i8x16 {
block0(v0: i8x16, v1: i8):
v2 = splat.i8x16 v1
v3 = iadd v0, v2
return v3
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma)
; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0xd7, 0x42, 0x15, 0x02
; .byte 0xa7, 0x82, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iadd_splat_i16x8(i16x8, i16) -> i16x8 {
block0(v0: i16x8, v1: i16):
v2 = splat.i16x8 v1
v3 = iadd v0, v2
return v3
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma)
; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0xd7, 0x42, 0x15, 0x02
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0xa7, 0x82, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iadd_splat_i32x4(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = splat.i32x4 v1
v3 = iadd v0, v2
return v3
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0xd7, 0x42, 0x15, 0x02
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0xa7, 0x82, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iadd_splat_i64x2(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = splat.i64x2 v1
v3 = iadd v2, v0
return v3
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma)
; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0xd7, 0x42, 0x15, 0x02
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0xa7, 0x82, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

49 changes: 41 additions & 8 deletions cranelift/filetests/filetests/runtests/simd-iadd-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,71 @@ target x86_64
target x86_64 skylake
target riscv64 has_v

function %iadd_splat_i8x16(i8x16) -> i8x16 {
function %iadd_splat_const_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i8 5
v2 = splat.i8x16 v1
v3 = iadd v0, v2
return v3
}
; run: %iadd_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21]
; run: %iadd_splat_const_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21]

function %iadd_splat_i16x8(i16x8) -> i16x8 {
function %iadd_splat_const_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i16 -16
v2 = splat.i16x8 v1
v3 = iadd v0, v2
return v3
}
; run: %iadd_splat_i16x8([1 2 3 4 5 6 7 8]) == [-15 -14 -13 -12 -11 -10 -9 -8]
; run: %iadd_splat_const_i16x8([1 2 3 4 5 6 7 8]) == [-15 -14 -13 -12 -11 -10 -9 -8]

function %iadd_splat_i32x4(i32x4) -> i32x4 {
function %iadd_splat_const_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 15
v2 = splat.i32x4 v1
v3 = iadd v0, v2
return v3
}
; run: %iadd_splat_i32x4([1 2 3 4]) == [16 17 18 19]
; run: %iadd_splat_const_i32x4([1 2 3 4]) == [16 17 18 19]

function %iadd_splat_i64x2(i64x2) -> i64x2 {
function %iadd_splat_const_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i64 -5
v2 = splat.i64x2 v1
v3 = iadd v2, v0
return v3
}
; run: %iadd_splat_i64x2([1 2]) == [-4 -3]
; run: %iadd_splat_const_i64x2([1 2]) == [-4 -3]


function %iadd_splat_i8x16(i8x16, i8) -> i8x16 {
block0(v0: i8x16, v1: i8):
v2 = splat.i8x16 v1
v3 = iadd v0, v2
return v3
}
; run: %iadd_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], -15) == [-14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1]

function %iadd_splat_i16x8(i16x8, i16) -> i16x8 {
block0(v0: i16x8, v1: i16):
v2 = splat.i16x8 v1
v3 = iadd v0, v2
return v3
}
; run: %iadd_splat_i16x8([1 2 3 4 5 6 7 8], -10) == [-9 -8 -7 -6 -5 -4 -3 -2]

function %iadd_splat_i32x4(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = splat.i32x4 v1
v3 = iadd v0, v2
return v3
}
; run: %iadd_splat_i32x4([1 2 3 4], 22) == [23 24 25 26]

function %iadd_splat_i64x2(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = splat.i64x2 v1
v3 = iadd v2, v0
return v3
}
; run: %iadd_splat_i64x2([1 2], 10) == [11 12]

0 comments on commit 6a5e40e

Please sign in to comment.