forked from bytecodealliance/wasmtime
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pulley: Slightly optimize bounds checks (bytecodealliance#10080)
* pulley: Slightly optimize bounds checks In profiling a module I was noticing that the previous `xbc32_bound_trap` instruction wasn't being used when I expected. Investigation revealed that the load of the bound itself was GVN'd and deduplicated (yay!) but it meant that the load was used in two locations meaning it didn't pass checks for `sinkable_load`. This commit fixes this by repurposing `xbc32_bound_trap` for "the bound is in a register" and renaming the previous instruction to `xbc32_boundne_trap`. This helps cut down on the number of opcodes in this benchmark and improves performance slightly. At the same time this tightens up "sinkable loads" to require native endianness since that's what the bound of memory is stored as. Additionally in addition to testing for `a < b` and optimizing that this also now optimizes `b > a`, the same condition just having the arguments swapped. * Fix some copy/paste typos
- Loading branch information
1 parent
2f27a10
commit 7d78789
Showing
9 changed files
with
329 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
test compile precise-output | ||
target pulley32 | ||
|
||
function %simple(i32, i32) { | ||
block0(v0: i32, v1: i32): | ||
v2 = load.i32 v0+16 | ||
v3 = iconst.i32 24 | ||
v4 = isub v2, v3 | ||
v5 = icmp ugt v1, v4 | ||
trapnz v5, user1 | ||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xbc32_boundne_trap x1, x0, 16, 24 | ||
; ret | ||
|
||
function %swapped_args(i32, i32) { | ||
block0(v0: i32, v1: i32): | ||
v2 = load.i32 v0+16 | ||
v3 = iconst.i32 24 | ||
v4 = isub v2, v3 | ||
v5 = icmp ult v4, v1 | ||
trapnz v5, user1 | ||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xbc32_boundne_trap x1, x0, 16, 24 | ||
; ret | ||
|
||
function %twice(i32, i32, i32) { | ||
block0(v0: i32, v1: i32, v2: i32): | ||
;; load the bound & calculate what to check against | ||
v3 = load.i32 v0+16 | ||
v4 = iconst.i32 24 | ||
v5 = isub v3, v4 | ||
|
||
;; check v1 | ||
v6 = icmp ugt v1, v5 | ||
trapnz v6, user1 | ||
|
||
;; check v2 | ||
v7 = icmp ugt v2, v5 | ||
trapnz v7, user1 | ||
|
||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; x4 = xload32 x0+16 // flags = | ||
; xbc32_bound_trap x1, x4, 24 // trap=TrapCode(1) | ||
; xbc32_bound_trap x2, x4, 24 // trap=TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xload32le_offset8 x4, x0, 16 | ||
; xbc32_bound_trap x1, x4, 24 | ||
; xbc32_bound_trap x2, x4, 24 | ||
; ret | ||
|
||
|
||
function %twice_swapped(i32, i32, i32) { | ||
block0(v0: i32, v1: i32, v2: i32): | ||
;; load the bound & calculate what to check against | ||
v3 = load.i32 v0+16 | ||
v4 = iconst.i32 24 | ||
v5 = isub v3, v4 | ||
|
||
;; check v1 | ||
v6 = icmp ult v5, v1 | ||
trapnz v6, user1 | ||
|
||
;; check v2 | ||
v7 = icmp ult v5, v1 | ||
trapnz v7, user1 | ||
|
||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; x3 = xload32 x0+16 // flags = | ||
; xbc32_bound_trap x1, x3, 24 // trap=TrapCode(1) | ||
; xbc32_bound_trap x1, x3, 24 // trap=TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xload32le_offset8 x3, x0, 16 | ||
; xbc32_bound_trap x1, x3, 24 | ||
; xbc32_bound_trap x1, x3, 24 | ||
; ret | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
test compile precise-output | ||
target pulley64 | ||
|
||
function %simple(i64, i32) { | ||
block0(v0: i64, v1: i32): | ||
v2 = load.i64 v0+16 | ||
v3 = uextend.i64 v1 | ||
v4 = iconst.i64 24 | ||
v5 = isub v2, v4 | ||
v6 = icmp ugt v3, v5 | ||
trapnz v6, user1 | ||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xbc32_boundne_trap x1, x0, 16, 24 | ||
; ret | ||
|
||
function %swapped_args(i64, i32) { | ||
block0(v0: i64, v1: i32): | ||
v2 = load.i64 v0+16 | ||
v3 = uextend.i64 v1 | ||
v4 = iconst.i64 24 | ||
v5 = isub v2, v4 | ||
v6 = icmp ult v5, v3 | ||
trapnz v6, user1 | ||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xbc32_boundne_trap x1, x0, 16, 24 | ||
; ret | ||
|
||
function %twice(i64, i32, i32) { | ||
block0(v0: i64, v1: i32, v2: i32): | ||
;; load the bound & calculate what to check against | ||
v3 = load.i64 v0+16 | ||
v4 = iconst.i64 24 | ||
v5 = isub v3, v4 | ||
|
||
;; check v1 | ||
v6 = uextend.i64 v1 | ||
v7 = icmp ugt v6, v5 | ||
trapnz v7, user1 | ||
|
||
;; check v2 | ||
v8 = uextend.i64 v2 | ||
v9 = icmp ugt v8, v5 | ||
trapnz v9, user1 | ||
|
||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; x4 = xload64 x0+16 // flags = | ||
; xbc32_bound_trap x1, x4, 24 // trap=TrapCode(1) | ||
; xbc32_bound_trap x2, x4, 24 // trap=TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xload64le_offset8 x4, x0, 16 | ||
; xbc32_bound_trap x1, x4, 24 | ||
; xbc32_bound_trap x2, x4, 24 | ||
; ret | ||
|
||
function %twice_swapped(i64, i32, i32) { | ||
block0(v0: i64, v1: i32, v2: i32): | ||
;; load the bound & calculate what to check against | ||
v3 = load.i64 v0+16 | ||
v4 = iconst.i64 24 | ||
v5 = isub v3, v4 | ||
|
||
;; check v1 | ||
v6 = uextend.i64 v1 | ||
v7 = icmp ult v5, v6 | ||
trapnz v7, user1 | ||
|
||
;; check v2 | ||
v8 = uextend.i64 v2 | ||
v9 = icmp ugt v5, v8 | ||
trapnz v9, user1 | ||
|
||
return | ||
} | ||
|
||
; VCode: | ||
; block0: | ||
; x7 = xload64 x0+16 // flags = | ||
; xsub64_u8 x6, x7, 24 | ||
; xbc32_bound_trap x1, x7, 24 // trap=TrapCode(1) | ||
; zext32 x7, x2 | ||
; trap_if_xult64 x7, x6 // code = TrapCode(1) | ||
; ret | ||
; | ||
; Disassembled: | ||
; xload64le_offset8 x7, x0, 16 | ||
; xsub64_u8 x6, x7, 24 | ||
; xbc32_bound_trap x1, x7, 24 | ||
; zext32 x7, x2 | ||
; br_if_xult64 x7, x6, 0x8 // target = 0x17 | ||
; ret | ||
; trap | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.