Skip to content

Commit

Permalink
x64: Implement rotl/rotr for some small type combinations
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed Aug 11, 2022
1 parent 66e245d commit 32cda80
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 272 deletions.
48 changes: 10 additions & 38 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -818,31 +818,17 @@

;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i16` and `i8`: we need to extend the shift amount, or mask the
;; constant.
;; `i64` and smaller: we can rely on x86's rotate-amount masking since
;; we operate on the whole register. For const's we mask the constant.

(rule (lower (has_type (ty_8_or_16 ty) (rotl src amt)))
(let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
(x64_rotl ty src (gpr_to_imm8_gpr amt_))))
(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
(x64_rotl ty src (put_masked_in_imm8_gpr amt ty)))

(rule (lower (has_type (ty_8_or_16 ty)
(rule (lower (has_type (fits_in_64 ty)
(rotl src (u64_from_iconst amt))))
(x64_rotl ty src
(const_to_type_masked_imm8 amt ty)))

;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
;; we operate on the whole register.

(rule (lower (has_type (ty_32_or_64 ty) (rotl src amt)))
;; NB: Only the low bits of `amt` matter since we logically mask the
;; shift amount to the value's bit width.
(let ((amt_ Gpr (lo_gpr amt)))
(x64_rotl ty src amt_)))

(rule (lower (has_type (ty_32_or_64 ty)
(rotl src (u64_from_iconst amt))))
(x64_rotl ty src
(const_to_type_masked_imm8 amt ty)))

;; `i128`.

Expand All @@ -858,31 +844,17 @@

;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i16` and `i8`: we need to extend the shift amount, or mask the
;; constant.
;; `i64` and smaller: we can rely on x86's rotate-amount masking since
;; we operate on the whole register. For const's we mask the constant.

(rule (lower (has_type (ty_8_or_16 ty) (rotr src amt)))
(let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
(x64_rotr ty src amt_)))
(rule (lower (has_type (fits_in_64 ty) (rotr src amt)))
(x64_rotr ty src (put_masked_in_imm8_gpr amt ty)))

(rule (lower (has_type (ty_8_or_16 ty)
(rule (lower (has_type (fits_in_64 ty)
(rotr src (u64_from_iconst amt))))
(x64_rotr ty src
(const_to_type_masked_imm8 amt ty)))

;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
;; we operate on the whole register.

(rule (lower (has_type (ty_32_or_64 ty) (rotr src amt)))
;; NB: Only the low bits of `amt` matter since we logically mask the
;; shift amount to the value's bit width.
(let ((amt_ Gpr (lo_gpr amt)))
(x64_rotr ty src amt_)))

(rule (lower (has_type (ty_32_or_64 ty)
(rotr src (u64_from_iconst amt))))
(x64_rotr ty src
(const_to_type_masked_imm8 amt ty)))

;; `i128`.

Expand Down
117 changes: 0 additions & 117 deletions cranelift/filetests/filetests/runtests/rotl-small-types.clif

This file was deleted.

112 changes: 112 additions & 0 deletions cranelift/filetests/filetests/runtests/rotl.clif
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,118 @@ block0(v0: i32, v1: i8):
; run: %rotl_i32_i8(0xe0000004, 66) == 0x80000013


function %rotl_i16_i64(i16, i64) -> i16 {
block0(v0: i16, v1: i64):
v2 = rotl.i16 v0, v1
return v2
}
; run: %rotl_i16_i64(0xe000, 0) == 0xe000
; run: %rotl_i16_i64(0xe000, 1) == 0xc001
; run: %rotl_i16_i64(0xef0f, 0) == 0xef0f
; run: %rotl_i16_i64(0xef0f, 4) == 0xf0fe
; run: %rotl_i16_i64(0xe004, 64) == 0xe004
; run: %rotl_i16_i64(0xe004, 65) == 0xc009
; run: %rotl_i16_i64(0xe004, 66) == 0x8013
; run: %rotl_i16_i64(0xe004, 257) == 0xc009

function %rotl_i16_i32(i16, i32) -> i16 {
block0(v0: i16, v1: i32):
v2 = rotl.i16 v0, v1
return v2
}
; run: %rotl_i16_i32(0xe000, 0) == 0xe000
; run: %rotl_i16_i32(0xe000, 1) == 0xc001
; run: %rotl_i16_i32(0xef0f, 0) == 0xef0f
; run: %rotl_i16_i32(0xef0f, 4) == 0xf0fe
; run: %rotl_i16_i32(0xe004, 64) == 0xe004
; run: %rotl_i16_i32(0xe004, 65) == 0xc009
; run: %rotl_i16_i32(0xe004, 66) == 0x8013
; run: %rotl_i16_i32(0xe004, 257) == 0xc009

function %rotl_i16_i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = rotl.i16 v0, v1
return v2
}
; run: %rotl_i16_i16(0xe000, 0) == 0xe000
; run: %rotl_i16_i16(0xe000, 1) == 0xc001
; run: %rotl_i16_i16(0xef0f, 0) == 0xef0f
; run: %rotl_i16_i16(0xef0f, 4) == 0xf0fe
; run: %rotl_i16_i16(0xe004, 64) == 0xe004
; run: %rotl_i16_i16(0xe004, 65) == 0xc009
; run: %rotl_i16_i16(0xe004, 66) == 0x8013
; run: %rotl_i16_i16(0xe004, 257) == 0xc009

function %rotl_i16_i8(i16, i8) -> i16 {
block0(v0: i16, v1: i8):
v2 = rotl.i16 v0, v1
return v2
}
; run: %rotl_i16_i8(0xe000, 0) == 0xe000
; run: %rotl_i16_i8(0xe000, 1) == 0xc001
; run: %rotl_i16_i8(0xef0f, 0) == 0xef0f
; run: %rotl_i16_i8(0xef0f, 4) == 0xf0fe
; run: %rotl_i16_i8(0xe004, 64) == 0xe004
; run: %rotl_i16_i8(0xe004, 65) == 0xc009
; run: %rotl_i16_i8(0xe004, 66) == 0x8013


function %rotl_i8_i64(i8, i64) -> i8 {
block0(v0: i8, v1: i64):
v2 = rotl.i8 v0, v1
return v2
}
; run: %rotl_i8_i64(0xe0, 0) == 0xe0
; run: %rotl_i8_i64(0xe0, 1) == 0xc1
; run: %rotl_i8_i64(0xef, 0) == 0xef
; run: %rotl_i8_i64(0xef, 4) == 0xfe
; run: %rotl_i8_i64(0xe4, 64) == 0xe4
; run: %rotl_i8_i64(0xe4, 65) == 0xc9
; run: %rotl_i8_i64(0xe4, 66) == 0x93
; run: %rotl_i8_i64(0xe4, 257) == 0xc9

function %rotl_i8_i32(i8, i32) -> i8 {
block0(v0: i8, v1: i32):
v2 = rotl.i8 v0, v1
return v2
}
; run: %rotl_i8_i32(0xe0, 0) == 0xe0
; run: %rotl_i8_i32(0xe0, 1) == 0xc1
; run: %rotl_i8_i32(0xef, 0) == 0xef
; run: %rotl_i8_i32(0xef, 4) == 0xfe
; run: %rotl_i8_i32(0xe4, 64) == 0xe4
; run: %rotl_i8_i32(0xe4, 65) == 0xc9
; run: %rotl_i8_i32(0xe4, 66) == 0x93
; run: %rotl_i8_i32(0xe4, 257) == 0xc9

function %rotl_i8_i16(i8, i16) -> i8 {
block0(v0: i8, v1: i16):
v2 = rotl.i8 v0, v1
return v2
}
; run: %rotl_i8_i16(0xe0, 0) == 0xe0
; run: %rotl_i8_i16(0xe0, 1) == 0xc1
; run: %rotl_i8_i16(0xef, 0) == 0xef
; run: %rotl_i8_i16(0xef, 4) == 0xfe
; run: %rotl_i8_i16(0xe4, 64) == 0xe4
; run: %rotl_i8_i16(0xe4, 65) == 0xc9
; run: %rotl_i8_i16(0xe4, 66) == 0x93
; run: %rotl_i8_i16(0xe4, 257) == 0xc9

function %rotl_i8_i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = rotl.i8 v0, v1
return v2
}
; run: %rotl_i8_i8(0xe0, 0) == 0xe0
; run: %rotl_i8_i8(0xe0, 1) == 0xc1
; run: %rotl_i8_i8(0xef, 0) == 0xef
; run: %rotl_i8_i8(0xef, 4) == 0xfe
; run: %rotl_i8_i8(0xe4, 64) == 0xe4
; run: %rotl_i8_i8(0xe4, 65) == 0xc9
; run: %rotl_i8_i8(0xe4, 66) == 0x93



;; This is a regression test for rotates on x64
;; See: https://github.com/bytecodealliance/wasmtime/pull/3610
Expand Down
Loading

0 comments on commit 32cda80

Please sign in to comment.