Skip to content

Commit

Permalink
Port AvgRound & SqmulRoundSat to ISLE (AArch64) (#4639)
Browse files Browse the repository at this point in the history
Ported the existing implementations of the following opcodes on AArch64
to ISLE:
- `AvgRound`
  - Also introduced support for `i64x2` vectors, as per the docs.
- `SqmulRoundSat`

Copyright (c) 2022 Arm Limited
  • Loading branch information
dheaton-arm authored Aug 8, 2022
1 parent 47a67d7 commit e463890
Show file tree
Hide file tree
Showing 9 changed files with 369 additions and 56 deletions.
2 changes: 2 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,8 @@ fn define_simd_arithmetic(
"avg_round",
r#"
Unsigned average with rounding: `a := (x + y + 1) // 2`
The addition does not lose any information (such as from overflow).
"#,
&formats.binary,
)
Expand Down
7 changes: 7 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1551,6 +1551,13 @@
(_ Unit (emit (MInst.VecLanes op dst src size))))
dst))

;; Helper for emitting `MInst.VecShiftImm` instructions.
(decl vec_shift_imm (VecShiftImmOp u8 Reg VectorSize) Reg)
(rule (vec_shift_imm op imm src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecShiftImm op dst src size imm))))
dst))

;; Helper for emitting `MInst.VecDup` instructions.
(decl vec_dup (Reg VectorSize) Reg)
(rule (vec_dup src size)
Expand Down
29 changes: 16 additions & 13 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1976,31 +1976,34 @@ impl MachInstEmit for Inst {
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (is_shr, template) = match op {
VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
let (is_shr, mut template) = match op {
VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
};
if size.is_128bits() {
template |= 0b1 << 30;
}
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size, is_shr) {
(VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
let immh_immb = match (size.lane_size(), is_shr) {
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
Expand Down
180 changes: 180 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3946,6 +3946,18 @@ fn test_aarch64_binemit() {
"smax v8.4s, v12.4s, v14.4s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd: writable_vreg(8),
rn: vreg(1),
rm: vreg(3),
size: VectorSize::Size8x8,
},
"2814232E",
"urhadd v8.8b, v1.8b, v3.8b",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
Expand All @@ -3958,6 +3970,18 @@ fn test_aarch64_binemit() {
"urhadd v8.16b, v1.16b, v3.16b",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd: writable_vreg(2),
rn: vreg(13),
rm: vreg(6),
size: VectorSize::Size16x4,
},
"A215662E",
"urhadd v2.4h, v13.4h, v6.4h",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
Expand All @@ -3970,6 +3994,18 @@ fn test_aarch64_binemit() {
"urhadd v2.8h, v13.8h, v6.8h",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd: writable_vreg(8),
rn: vreg(12),
rm: vreg(14),
size: VectorSize::Size32x2,
},
"8815AE2E",
"urhadd v8.2s, v12.2s, v14.2s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
Expand Down Expand Up @@ -5123,6 +5159,126 @@ fn test_aarch64_binemit() {
"sshr v3.8h, v19.8h, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 8,
size: VectorSize::Size8x8,
},
"D904082F",
"ushr v25.8b, v6.8b, #8",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size8x8,
},
"A5060F2F",
"ushr v5.8b, v21.8b, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 8,
size: VectorSize::Size8x16,
},
"D904086F",
"ushr v25.16b, v6.16b, #8",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size8x16,
},
"A5060F6F",
"ushr v5.16b, v21.16b, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 16,
size: VectorSize::Size16x4,
},
"D904102F",
"ushr v25.4h, v6.4h, #16",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size16x4,
},
"A5061F2F",
"ushr v5.4h, v21.4h, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 16,
size: VectorSize::Size16x8,
},
"D904106F",
"ushr v25.8h, v6.8h, #16",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size16x8,
},
"A5061F6F",
"ushr v5.8h, v21.8h, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 32,
size: VectorSize::Size32x2,
},
"D904202F",
"ushr v25.2s, v6.2s, #32",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size32x2,
},
"A5063F2F",
"ushr v5.2s, v21.2s, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
Expand All @@ -5147,6 +5303,30 @@ fn test_aarch64_binemit() {
"ushr v5.4s, v21.4s, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 64,
size: VectorSize::Size64x2,
},
"D904406F",
"ushr v25.2d, v6.2d, #64",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size64x2,
},
"A5067F6F",
"ushr v5.2d, v21.2d, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Shl,
Expand Down
21 changes: 21 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,27 @@
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))

;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I64X2 (avg_round x y)))
(let ((one Reg (splat_const 1 (VectorSize.Size64x2)))
(c Reg (orr_vec x y (VectorSize.Size64x2)))
(c Reg (and_vec c one (VectorSize.Size64x2)))
(x Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 x
(VectorSize.Size64x2)))
(y Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 y
(VectorSize.Size64x2)))
(sum Reg (add_vec x y (VectorSize.Size64x2))))
(add_vec c sum (VectorSize.Size64x2))))

(rule (lower (has_type (lane_fits_in_32 ty) (avg_round x y)))
(vec_rrr (VecALUOp.Urhadd) x y (vector_size ty)))

;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _ _) (sqmul_round_sat x y)))
(vec_rrr (VecALUOp.Sqrdmulh) x y (vector_size ty)))

;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
Expand Down
45 changes: 2 additions & 43 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1502,27 +1502,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}

Opcode::Iabs => implemented_in_isle(ctx),
Opcode::AvgRound => {
let ty = ty.unwrap();

if ty.lane_bits() == 64 {
return Err(CodegenError::Unsupported(format!(
"AvgRound: Unsupported type: {:?}",
ty
)));
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}
Opcode::AvgRound => implemented_in_isle(ctx),

Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx),

Expand Down Expand Up @@ -1583,28 +1563,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
},

Opcode::SqmulRoundSat => {
let ty = ty.unwrap();

if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
return Err(CodegenError::Unsupported(format!(
"SqmulRoundSat: Unsupported type: {:?}",
ty
)));
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);

ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}
Opcode::SqmulRoundSat => implemented_in_isle(ctx),

Opcode::FcvtLowFromSint => {
let ty = ty.unwrap();
Expand Down
Loading

0 comments on commit e463890

Please sign in to comment.