Skip to content

Commit

Permalink
Add fallback implementation for popcnt
Browse files Browse the repository at this point in the history
  • Loading branch information
itsrainy committed Jun 12, 2023
1 parent 9b4a97e commit e42626d
Showing 1 changed file with 179 additions and 12 deletions.
191 changes: 179 additions & 12 deletions winch/codegen/src/isa/x64/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
use crate::{
isa::reg::Reg,
masm::{CalleeKind, CmpKind, DivKind, OperandSize, RemKind},
masm::{CalleeKind, CmpKind, DivKind, OperandSize, RegImm, RemKind},
};
use cranelift_codegen::{
entity::EntityRef,
ir::TrapCode,
ir::{ExternalName, Opcode, UserExternalNameRef},
isa::x64::{
args::{
self, AluRmiROpcode, Amode, CmpOpcode, DivSignedness, ExtMode, FromWritableReg, Gpr,
GprMem, GprMemImm, RegMem, RegMemImm, SyntheticAmode, WritableGpr, CC,
self, AluRmROpcode, AluRmiROpcode, Amode, CmpOpcode, DivSignedness, ExtMode,
FromWritableReg, Gpr, GprMem, GprMemImm, Imm8Gpr, Imm8Reg, RegMem, RegMemImm,
SyntheticAmode, WritableGpr, CC,
},
settings as x64_settings, CallInfo, EmitInfo, EmitState, Inst,
},
Expand Down Expand Up @@ -521,16 +522,182 @@ impl Assembler {
}

pub fn popcnt(&mut self, reg: Reg, size: OperandSize) {
assert!(
self.isa_flags.has_popcnt(),
"has_popcnt isa flag required for winch"
);
self.emit(Inst::UnaryRmR {
if self.isa_flags.has_popcnt() {
self.emit(Inst::UnaryRmR {
size: size.into(),
op: args::UnaryRmROpcode::Popcnt,
src: Gpr::new(reg.into()).unwrap().into(),
dst: Writable::from_reg(Gpr::new(reg.into()).unwrap()),
})
} else {
todo!("pick a fallback implementation");
}
}

fn popcnt_fallback1(&mut self, size: OperandSize, reg: Reg) {
let scratch = regs::scratch();
let scratch_2 = regs::scratch();
let masks = vec![
0x5555555555555555,
0x3333333333333333,
0x0f0f0f0f0f0f0f0f,
0x00ff00ff00ff00ff,
0x0000ffff0000ffff,
0x00000000ffffffff,
];
let max_pow = match size {
OperandSize::S32 => 5usize,
OperandSize::S64 => 6usize,
};
for i in 0..max_pow {
let shift_bits = 2_usize.pow(i as u32);
self.emit(Inst::ShiftR {
size: size.into(),
kind: args::ShiftKind::ShiftRightLogical,
src: reg.into(),
num_bits: Imm8Gpr::new(Imm8Reg::from(shift_bits as u8)).unwrap(),
dst: scratch.into(),
});

let mask = regs::scratch();
self.load_constant(&masks[i], mask, size);
self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::And,
src1: scratch.into(),
src2: mask.into(),
dst: scratch.into(),
});

self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::And,
src1: reg.into(),
src2: mask.into(),
dst: scratch_2.into(),
});

self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::Add,
src1: scratch.into(),
src2: scratch_2.into(),
dst: reg.into(),
});
}
}

fn popcnt_fallback2(&mut self, size: OperandSize, reg: Reg) {
let shifted1 = regs::scratch();
self.emit(Inst::ShiftR {
size: size.into(),
kind: args::ShiftKind::ShiftRightLogical,
src: reg.into(),
num_bits: Imm8Gpr::new(Imm8Reg::from(1u8)).unwrap(),
dst: shifted1.into(),
});

let fives = regs::scratch();
self.load_constant(&0x5555555555555555, fives, size);
let masked1 = regs::scratch();
self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::And,
src1: shifted1.into(),
src2: fives.into(),
dst: masked1.into(),
});

let diff1 = regs::scratch();
self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::Sub,
src1: reg.into(),
src2: masked1.into(),
dst: reg.into(),
});

let threes = regs::scratch();
self.load_constant(&0x3333333333333333, threes, size);
let masked2 = regs::scratch();
self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::And,
src1: reg.into(),
src2: threes.into(),
dst: masked2.into(),
});

let shifted2 = regs::scratch();
self.emit(Inst::ShiftR {
size: size.into(),
kind: args::ShiftKind::ShiftRightLogical,
src: reg.into(),
num_bits: Imm8Gpr::new(Imm8Reg::from(2u8)).unwrap(),
dst: shifted2.into(),
});

let shifted2_and_masked = regs::scratch();
self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::And,
src1: shifted2.into(),
src2: threes.into(),
dst: shifted2_and_masked.into(),
});

self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::Add,
src1: masked2.into(),
src2: shifted2_and_masked.into(),
dst: reg.into(),
});

let shifted4 = regs::scratch();
self.emit(Inst::ShiftR {
size: size.into(),
kind: args::ShiftKind::ShiftRightLogical,
src: reg.into(),
num_bits: Imm8Gpr::new(Imm8Reg::from(4u8)).unwrap(),
dst: shifted4.into(),
});

let sum2 = regs::scratch();
self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::Add,
src1: reg.into(),
src2: shifted4.into(),
dst: reg.into(),
});

let ofof = regs::scratch();
self.load_constant(&0x0f0f0f0f0f0f0f0f, ofof, size);
self.emit(Inst::AluRmiR {
size: size.into(),
op: args::UnaryRmROpcode::Popcnt,
src: Gpr::new(reg.into()).unwrap().into(),
dst: Writable::from_reg(Gpr::new(reg.into()).unwrap()),
})
op: AluRmiROpcode::And,
src1: reg.into(),
src2: ofof.into(),
dst: reg.into(),
});

let ones = regs::scratch();
self.load_constant(&0x0101010101010101, ones, size);
self.emit(Inst::AluRmiR {
size: size.into(),
op: AluRmiROpcode::Mul,
src1: reg.into(),
src2: ones.into(),
dst: reg.into(),
});
self.emit(Inst::ShiftR {
size: size.into(),
kind: args::ShiftKind::ShiftRightLogical,
src: reg.into(),
num_bits: Imm8Gpr::new(Imm8Reg::from(56u8)).unwrap(),
dst: reg.into(),
});
}

/// Set value in dst to `0` or `1` based on flags in status register and
Expand Down

0 comments on commit e42626d

Please sign in to comment.