Skip to content

Commit

Permalink
arm64: Fold some constants into load instructions
Browse files Browse the repository at this point in the history
This changes the following:
  mov x0, #4
  ldr x0, [x1, #4]

Into:
  ldr x0, [x1]

I noticed this pattern (but with #0), in a benchmark.

Copyright (c) 2020, Arm Limited.
  • Loading branch information
jgouly authored and cfallin committed Nov 30, 2020
1 parent 209270b commit f7227a1
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 7 deletions.
26 changes: 19 additions & 7 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,11 +574,11 @@ type AddressAddend64List = SmallVec<[Reg; 4]>;
/// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
/// do a zero-extension.
///
/// We do not descend further into the inputs of extensions, because supporting
/// (e.g.) a 32-bit add that is later extended would require additional masking
/// of high-order bits, which is too complex. So, in essence, we descend any
/// number of adds from the roots, collecting all 64-bit address addends; then
/// possibly support extensions at these leaves.
/// We do not descend further into the inputs of extensions (unless it is a constant),
/// because supporting (e.g.) a 32-bit add that is later extended would require
/// additional masking of high-order bits, which is too complex. So, in essence, we
/// descend any number of adds from the roots, collecting all 64-bit address addends;
/// then possibly support extensions at these leaves.
fn collect_address_addends<C: LowerCtx<I = Inst>>(
ctx: &mut C,
roots: &[InsnInput],
Expand Down Expand Up @@ -609,8 +609,20 @@ fn collect_address_addends<C: LowerCtx<I = Inst>>(
ExtendOp::SXTW
};
let extendee_input = InsnInput { insn, input: 0 };
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
result32.push((reg, extendop));
// If the input is a zero-extension of a constant, add the value to the known
// offset.
// Only do this for zero-extension, as generating a sign-extended
// constant may be more instructions than using the 'SXTW' addressing mode.
if let (Some(insn), ExtendOp::UXTW) = (
maybe_input_insn(ctx, extendee_input, Opcode::Iconst),
extendop,
) {
let value = ctx.get_constant(insn).unwrap() as i64;
offset += value;
} else {
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
result32.push((reg, extendop));
}
}
Opcode::Uextend | Opcode::Sextend => {
let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
Expand Down
30 changes: 30 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/amodes.clif
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,33 @@ block0(v0: i32, v1: i32):
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %f16(i64) -> i32 {
block0(v0: i64):
v1 = iconst.i32 0
v2 = uextend.i64 v1
v3 = load_complex.i32 v0+v2
return v3
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %f17(i64) -> i32 {
block0(v0: i64):
v1 = iconst.i32 4
v2 = uextend.i64 v1
v3 = load_complex.i32 v0+v2
return v3
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldur w0, [x0, #4]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

0 comments on commit f7227a1

Please sign in to comment.