Skip to content

Commit

Permalink
Implement vpmaxq_u8 on aarch64
Browse files Browse the repository at this point in the history
  • Loading branch information
bjorn3 committed Feb 21, 2025
1 parent b4998e8 commit cc3a305
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 13 deletions.
77 changes: 77 additions & 0 deletions src/shims/aarch64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use rustc_middle::mir::BinOp;
use rustc_middle::ty::Ty;
use rustc_span::Symbol;
use rustc_target::callconv::{Conv, FnAbi};

use crate::*;

impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn emulate_aarch64_intrinsic(
&mut self,
link_name: Symbol,
abi: &FnAbi<'tcx, Ty<'tcx>>,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, EmulateItemResult> {
let this = self.eval_context_mut();
// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.aarch64.").unwrap();
match unprefixed_name {
"isb" => {
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
let arg = this.read_scalar(arg)?.to_i32()?;
match arg {
// SY ("full system scope")
15 => {
this.yield_active_thread();
}
_ => {
throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
}
}
}

// Used to implement the vpmaxq_u8 function.
// Folding maximum of adjacent pairs.
// https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8
"neon.umaxp.v16i8" => {
let [left, right] = this.check_shim(abi, Conv::C, link_name, args)?;

let (left, left_len) = this.project_to_simd(left)?;
let (right, right_len) = this.project_to_simd(right)?;
let (dest, lane_count) = this.project_to_simd(dest)?;
assert_eq!(left_len, right_len);
assert_eq!(lane_count, left_len);

for lane_idx in 0..lane_count {
let src = if lane_idx < (lane_count / 2) { &left } else { &right };
#[allow(clippy::arithmetic_side_effects)]
let src_idx = lane_idx % (lane_count / 2);

#[allow(clippy::arithmetic_side_effects)]
let lhs_lane = this.read_immediate(&this.project_index(src, src_idx * 2)?)?;
#[allow(clippy::arithmetic_side_effects)]
let rhs_lane =
this.read_immediate(&this.project_index(src, src_idx * 2 + 1)?)?;

let res_lane = if this
.binary_op(BinOp::Gt, &lhs_lane, &rhs_lane)?
.to_scalar()
.to_bool()?
{
lhs_lane
} else {
rhs_lane
};

let dest = this.project_index(&dest, lane_idx)?;
this.write_immediate(*res_lane, &dest)?;
}
}

_ => return interp_ok(EmulateItemResult::NotSupported),
}
interp_ok(EmulateItemResult::NeedsReturn)
}
}
18 changes: 5 additions & 13 deletions src/shims/foreign_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -939,20 +939,12 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
this, link_name, abi, args, dest,
);
}
// FIXME: Move these to an `arm` submodule.
"llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => {
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
let arg = this.read_scalar(arg)?.to_i32()?;
match arg {
// SY ("full system scope")
15 => {
this.yield_active_thread();
}
_ => {
throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
}
}
name if name.starts_with("llvm.aarch64.") && this.tcx.sess.target.arch == "aarch64" => {
return shims::aarch64::EvalContextExt::emulate_aarch64_intrinsic(
this, link_name, abi, args, dest,
);
}
// FIXME: Move this to an `arm` submodule.
"llvm.arm.hint" if this.tcx.sess.target.arch == "arm" => {
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
let arg = this.read_scalar(arg)?.to_i32()?;
Expand Down
1 change: 1 addition & 0 deletions src/shims/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![warn(clippy::arithmetic_side_effects)]

mod aarch64;
mod alloc;
mod backtrace;
mod files;
Expand Down

0 comments on commit cc3a305

Please sign in to comment.