Skip to content

Commit

Permalink
[llvm][arm] add T1 and T2 assembly options for vlldm and vlstm (llvm#…
Browse files Browse the repository at this point in the history
…83116)

T1 allows for an optional registers list, the register list must be {d0-d15}.
T2 defines a mandatory register list, the register list must be {d0-d31}.

The requirements for T1/T2 are as follows:
                T1              T2
Require:        v8-M.Main,      v8.1-M.Main,
                secure state    secure state
16 D Regs       valid           valid
32 D Regs       UNDEFINED       valid
No D Regs       NOP             NOP
  • Loading branch information
sivan-shani authored Feb 28, 2024
1 parent 2640277 commit 634b024
Show file tree
Hide file tree
Showing 15 changed files with 362 additions and 61 deletions.
57 changes: 37 additions & 20 deletions llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1468,15 +1468,21 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
if (passesFPReg)
assert(STI->hasFPRegs() && "Subtarget needs fpregs");

// Lazy store all fp registers to the stack.
// Lazy store all fp registers to the stack
// This executes as NOP in the absence of floating-point support.
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
VLSTM.addReg(R, RegState::Implicit |
(LiveRegs.contains(R) ? 0 : RegState::Undef));
MachineInstrBuilder VLSTM =
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on
// the encoding.
// Mark non-live registers as undef
for (MachineOperand &MO : VLSTM->implicit_operands()) {
if (MO.isReg() && !MO.isDef()) {
Register Reg = MO.getReg();
MO.setIsUndef(!LiveRegs.contains(Reg));
}
}

// Restore all arguments
for (const auto &Regs : ClearedFPRegs) {
Expand Down Expand Up @@ -1563,14 +1569,20 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
.addImm(CMSE_FP_SAVE_SIZE >> 2)
.add(predOps(ARMCC::AL));

// Lazy store all FP registers to the stack
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
VLSTM.addReg(R, RegState::Implicit |
(LiveRegs.contains(R) ? 0 : RegState::Undef));
// Lazy store all fp registers to the stack.
MachineInstrBuilder VLSTM =
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on
// the encoding.
// Mark non-live registers as undef
for (MachineOperand &MO : VLSTM->implicit_operands()) {
if (MO.isReg() && MO.isImplicit() && !MO.isDef()) {
Register Reg = MO.getReg();
MO.setIsUndef(!LiveRegs.contains(Reg));
}
}
} else {
// Push all the callee-saved registers (s16-s31).
MachineInstrBuilder VPUSH =
Expand Down Expand Up @@ -1673,9 +1685,12 @@ void ARMExpandPseudo::CMSERestoreFPRegsV8(

// Lazy load fp regs from stack.
// This executes as NOP in the absence of floating-point support.
MachineInstrBuilder VLLDM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
MachineInstrBuilder VLLDM =
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on
// the encoding.

if (STI->fixCMSE_CVE_2021_35465()) {
auto Bundler = MIBundleBuilder(MBB, VLLDM);
Expand Down Expand Up @@ -1757,7 +1772,9 @@ void ARMExpandPseudo::CMSERestoreFPRegsV81(
// Load FP registers from stack.
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on the
// encoding.

// Pop the stack space
BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
Expand Down
31 changes: 31 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,37 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let Inst{8} = 0; // Single precision
}

// Single Precision with fixed registers.
// For when the registers-to-be-stored/loaded are fixed, e.g. VLLDM and VLSTM
class AXSI4FR<string asm, bit et, bit load>
: InstARM<AddrMode4, 4, IndexModeNone, VFPLdStMulFrm, VFPDomain, "", NoItinerary> {
// Instruction operands.
bits<4> Rn;
bits<13> regs; // Does not affect encoding, for assembly/disassembly only.
list<Predicate> Predicates = [HasVFP2];
let OutOperandList = (outs);
let InOperandList = (ins GPRnopc:$Rn, pred:$p, dpr_reglist:$regs);
let AsmString = asm;
let Pattern = [];
let DecoderNamespace = "VFP";
// Encode instruction operands.
let Inst{19-16} = Rn;
let Inst{31-28} = 0b1110;
let Inst{27-25} = 0b110;
let Inst{24} = 0b0;
let Inst{23} = 0b0;
let Inst{22} = 0b0;
let Inst{21} = 0b1;
let Inst{20} = load; // Distinguishes vlldm from vlstm
let Inst{15-12} = 0b0000;
let Inst{11-9} = 0b101;
let Inst{8} = 0; // Single precision
let Inst{7} = et; // encoding type, 0 for T1 and 1 for T2.
let Inst{6-0} = 0b0000000;
let mayLoad = load;
let mayStore = !eq(load, 0);
}

// Double precision, unary
class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
Expand Down
64 changes: 43 additions & 21 deletions llvm/lib/Target/ARM/ARMInstrVFP.td
Original file line number Diff line number Diff line change
Expand Up @@ -313,29 +313,51 @@ def : MnemonicAlias<"vstm", "vstmia">;
//===----------------------------------------------------------------------===//
// Lazy load / store multiple Instructions
//
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlldm${p}\t$Rn", "", []>,
// VLLDM and VLSTM:
// 2 encoding options:
// T1 (bit 7 is 0):
// T1 takes an optional dpr_reglist, must be '{d0-d15}' (exactly)
// T1 require v8-M.Main, secure state, target with 16 D registers (or with no D registers - NOP)
// T2 (bit 7 is 1):
// T2 takes a mandatory dpr_reglist, must be '{d0-d31}' (exactly)
// T2 require v8.1-M.Main, secure state, target with 16/32 D registers (or with no D registers - NOP)
// (source: Arm v8-M ARM, DDI0553B.v ID16122022)

def VLLDM : AXSI4FR<"vlldm${p}\t$Rn, $regs", 0, 1>,
Requires<[HasV8MMainline, Has8MSecExt]> {
let Inst{24-23} = 0b00;
let Inst{22} = 0;
let Inst{21} = 1;
let Inst{20} = 1;
let Inst{15-12} = 0;
let Inst{7-0} = 0;
let mayLoad = 1;
let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
}

def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlstm${p}\t$Rn", "", []>,
let Defs = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}
// T1: assembly does not contains the register list.
def : InstAlias<"vlldm${p}\t$Rn", (VLLDM GPRnopc:$Rn, pred:$p, 0)>,
Requires<[HasV8MMainline, Has8MSecExt]>;
// T2: assembly must contains the register list.
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
def VLLDM_T2 : AXSI4FR<"vlldm${p}\t$Rn, $regs", 1, 1>,
Requires<[HasV8_1MMainline, Has8MSecExt]> {
let Defs = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}
// T1: assembly contains the register list.
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
def VLSTM : AXSI4FR<"vlstm${p}\t$Rn, $regs", 0, 0>,
Requires<[HasV8MMainline, Has8MSecExt]> {
let Inst{24-23} = 0b00;
let Inst{22} = 0;
let Inst{21} = 1;
let Inst{20} = 0;
let Inst{15-12} = 0;
let Inst{7-0} = 0;
let mayStore = 1;
let Defs = [VPR, FPSCR, FPSCR_NZCV];
let Uses = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}
// T1: assembly does not contain the register list.
def : InstAlias<"vlstm${p}\t$Rn", (VLSTM GPRnopc:$Rn, pred:$p, 0)>,
Requires<[HasV8MMainline, Has8MSecExt]>;
// T2: assembly must contain the register list.
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
def VLSTM_T2 : AXSI4FR<"vlstm${p}\t$Rn, $regs", 1, 0>,
Requires<[HasV8_1MMainline, Has8MSecExt]> {
let Defs = [VPR, FPSCR, FPSCR_NZCV];
let Uses = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}

def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,
Expand Down
76 changes: 67 additions & 9 deletions llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,12 @@ class ARMAsmParser : public MCTargetAsmParser {
bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,
unsigned ListNo);

int tryParseRegister();
int tryParseRegister(bool AllowOutofBoundReg = false);
bool tryParseRegisterWithWriteBack(OperandVector &);
int tryParseShiftRegister(OperandVector &);
bool parseRegisterList(OperandVector &, bool EnforceOrder = true,
bool AllowRAAC = false);
bool AllowRAAC = false,
bool AllowOutOfBoundReg = false);
bool parseMemory(OperandVector &);
bool parseOperand(OperandVector &, StringRef Mnemonic);
bool parseImmExpr(int64_t &Out);
Expand Down Expand Up @@ -4072,7 +4073,7 @@ ParseStatus ARMAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
/// Try to parse a register name. The token must be an Identifier when called,
/// and if it is a register name the token is eaten and the register number is
/// returned. Otherwise return -1.
int ARMAsmParser::tryParseRegister() {
int ARMAsmParser::tryParseRegister(bool AllowOutOfBoundReg) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
Expand Down Expand Up @@ -4116,7 +4117,8 @@ int ARMAsmParser::tryParseRegister() {
}

// Some FPUs only have 16 D registers, so D16-D31 are invalid
if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
if (!AllowOutOfBoundReg && !hasD32() && RegNum >= ARM::D16 &&
RegNum <= ARM::D31)
return -1;

Parser.Lex(); // Eat identifier token.
Expand Down Expand Up @@ -4456,7 +4458,7 @@ insertNoDuplicates(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,

/// Parse a register list.
bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
bool AllowRAAC) {
bool AllowRAAC, bool AllowOutOfBoundReg) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::LCurly))
return TokError("Token is not a Left Curly Brace");
Expand Down Expand Up @@ -4510,7 +4512,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
return Error(RegLoc, "pseudo-register not allowed");
Parser.Lex(); // Eat the minus.
SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
int EndReg = tryParseRegister(AllowOutOfBoundReg);
if (EndReg == -1)
return Error(AfterMinusLoc, "register expected");
if (EndReg == ARM::RA_AUTH_CODE)
Expand Down Expand Up @@ -4545,7 +4547,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
const AsmToken RegTok = Parser.getTok();
Reg = tryParseRegister();
Reg = tryParseRegister(AllowOutOfBoundReg);
if (Reg == -1)
return Error(RegLoc, "register expected");
if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE)
Expand Down Expand Up @@ -6085,8 +6087,11 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
case AsmToken::LBrac:
return parseMemory(Operands);
case AsmToken::LCurly:
return parseRegisterList(Operands, !Mnemonic.starts_with("clr"));
case AsmToken::LCurly: {
bool AllowOutOfBoundReg = Mnemonic == "vlldm" || Mnemonic == "vlstm";
return parseRegisterList(Operands, !Mnemonic.starts_with("clr"), false,
AllowOutOfBoundReg);
}
case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate
Expand Down Expand Up @@ -7596,6 +7601,33 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,

const unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
case ARM::VLLDM:
case ARM::VLLDM_T2:
case ARM::VLSTM:
case ARM::VLSTM_T2: {
// Since in some cases both T1 and T2 are valid, tablegen can not always
// pick the correct instruction.
if (Operands.size() == 4) { // a register list has been provided
ARMOperand &Op = static_cast<ARMOperand &>(
*Operands[3]); // the register list, a dpr_reglist
assert(Op.isDPRRegList());
auto &RegList = Op.getRegList();
// T2 requires v8.1-M.Main (cannot be handled by tablegen)
if (RegList.size() == 32 && !hasV8_1MMainline()) {
return Error(Op.getEndLoc(), "T2 version requires v8.1-M.Main");
}
// When target has 32 D registers, T1 is undefined.
if (hasD32() && RegList.size() != 32) {
return Error(Op.getEndLoc(), "operand must be exactly {d0-d31}");
}
// When target has 16 D registers, both T1 and T2 are valid.
if (!hasD32() && (RegList.size() != 16 && RegList.size() != 32)) {
return Error(Op.getEndLoc(),
"operand must be exactly {d0-d15} (T1) or {d0-d31} (T2)");
}
}
return false;
}
case ARM::t2IT: {
// Encoding is unpredictable if it ever results in a notional 'NV'
// predicate. Since we don't parse 'NV' directly this means an 'AL'
Expand Down Expand Up @@ -8731,6 +8763,32 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}

switch (Inst.getOpcode()) {
case ARM::VLLDM:
case ARM::VLSTM: {
// In some cases both T1 and T2 are valid, causing tablegen pick T1 instead
// of T2
if (Operands.size() == 4) { // a register list has been provided
ARMOperand &Op = static_cast<ARMOperand &>(
*Operands[3]); // the register list, a dpr_reglist
assert(Op.isDPRRegList());
auto &RegList = Op.getRegList();
// When the register list is {d0-d31} the instruction has to be the T2
// variant
if (RegList.size() == 32) {
const unsigned Opcode =
(Inst.getOpcode() == ARM::VLLDM) ? ARM::VLLDM_T2 : ARM::VLSTM_T2;
MCInst TmpInst;
TmpInst.setOpcode(Opcode);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
return true;
}
}
return false;
}
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
case ARM::LDRBT_POST: {
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,9 @@ DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder);
static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder);

#include "ARMGenDisassemblerTables.inc"

Expand Down Expand Up @@ -7030,3 +7033,23 @@ static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,

return DS;
}

static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;

const unsigned Rn = fieldFromInstruction(Insn, 16, 4);
// Adding Rn, holding memory location to save/load to/from, the only argument
// that is being encoded.
// '$Rn' in the assembly.
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
// An optional predicate, '$p' in the assembly.
DecodePredicateOperand(Inst, ARMCC::AL, Address, Decoder);
// An immediate that represents a floating point registers list. '$regs' in
// the assembly.
Inst.addOperand(MCOperand::createImm(0)); // Arbitrary value, has no effect.

return S;
}
Loading

0 comments on commit 634b024

Please sign in to comment.