Skip to content

Commit

Permalink
[LLVM][RVV 0.7.1] Emulate vector register whole load/store, and fix p…
Browse files Browse the repository at this point in the history
…otential instruction selection bugs (ruyisdk#23)

* [LLVM][RVV 0.7.1] Strictly distinguish RVV versions in TableGen files

* [LLVM][RVV 0.7.1] Start emulating register whole load/store

* [LLVM][RVV 0.7.1] All use `XVSE_V`

* [LLVM][RVV 0.7.1] Start expanding whole load/store pseudos

* [LLVM][RVV 0.7.1] Expand whole load

* [LLVM][RVV 0.7.1] Correctly expand whole load!

```
vl<LMUL>re<SEW>.v vd, (rs1)
```

is expanded to

```
csrr    t5, vl
csrr    t6, vtype
vsetvli x0, x0, e<SEW>, m<LMUL>
vle.v   vd, (rs1)
vsetvl  x0, t5, t6
```

* [LLVM][RVV 0.7.1] Correctly expand whole store!

```
vs<LMUL>r.v vd, (rs1)
```

is expanded to

```
csrr    t5, vl
csrr    t6, vtype
vsetvli x0, x0, e<SEW>, m<LMUL>
vse.v   vd, (rs1)
vsetvl  x0, t5, t6
```

* [LLVM][RVV 0.7.1] Extract common part

* [LLVM][RVV 0.7.1] Make lowering easier

* [LLVM][RVV 0.7.1] Remove unnecessary changes

* [LLVM][RVV 0.7.1] Test whole load/store for M1 cases
  • Loading branch information
imkiva committed Apr 1, 2024
1 parent 9cdf30e commit b74d592
Show file tree
Hide file tree
Showing 9 changed files with 427 additions and 22 deletions.
16 changes: 8 additions & 8 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -441,25 +441,25 @@ def FeatureStdExtV
"'V' (Vector Extension for Application Processors)",
[FeatureStdExtZvl128b, FeatureStdExtZve64d]>;

def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">,
def HasVInstructions : Predicate<"Subtarget->hasOnlyStdV()">,
AssemblerPredicate<
(any_of FeatureStdExtZve32x),
"'V' (Vector Extension for Application Processors), 'Zve32x' or "
"'Zve64x' (Vector Extensions for Embedded Processors)">;
def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
def HasVInstructionsI64 : Predicate<"Subtarget->hasOnlyStdVI64()">,
AssemblerPredicate<
(any_of FeatureStdExtZve64x),
"'V' (Vector Extension for Application Processors) or 'Zve64x' "
"(Vector Extensions for Embedded Processors)">;
def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
def HasVInstructionsAnyF : Predicate<"Subtarget->hasOnlyStdVAnyF()">,
AssemblerPredicate<
(any_of FeatureStdExtZve32f),
"'V' (Vector Extension for Application Processors), 'Zve32f', "
"'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;

def HasVInstructionsF64 : Predicate<"Subtarget->hasVInstructionsF64()">;
def HasVInstructionsF64 : Predicate<"Subtarget->hasOnlyStdVF64()">;

def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasVInstructionsFullMultiply()">;
def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasOnlyStdVFullMultiply()">;

def FeatureStdExtZvfbfmin
: SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true",
Expand All @@ -482,7 +482,7 @@ def FeatureStdExtZvfh
"'Zvfh' (Vector Half-Precision Floating-Point)",
[FeatureStdExtZve32f, FeatureStdExtZfhmin]>;

def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">;
def HasVInstructionsF16 : Predicate<"Subtarget->hasOnlyStdVF16()">;

def HasStdExtZfhOrZvfh
: Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZvfh()">,
Expand Down Expand Up @@ -926,13 +926,13 @@ def HasVendorXTHeadVediv : Predicate<"Subtarget->hasVendorXTHeadVediv()">,
"'xtheadvediv' (T-Head Divided Element Extension)">;

// Predicates for reusing instructions/intrinsics in both RVV 1.0 and 0.7
def HasStdVOrXTHeadV : Predicate<"Subtarget->hasVInstructions()">,
def HasStdVOrXTHeadV : Predicate<"Subtarget->hasStdVOrXTHeadV()">,
AssemblerPredicate<
(any_of FeatureStdExtZve32x, FeatureVendorXTHeadV),
"'V' (Vector Extension for Application Processors), 'Zve32x', "
"'Zve64x' (Vector Extensions for Embedded Processors) or"
"'XTHeadV' (Vector Extension for T-Head)">;
def HasStdVOrXTHeadVI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
def HasStdVOrXTHeadVI64 : Predicate<"Subtarget->hasStdVOrXTHeadVI64()">,
AssemblerPredicate<
(any_of FeatureStdExtZve64x, FeatureVendorXTHeadV),
"'V' (Vector Extension for Application Processors), 'Zve64x' "
Expand Down
92 changes: 92 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14480,6 +14480,66 @@ static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
return DoneMBB;
}

static MachineBasicBlock *emitXWholeLoadStore(MachineInstr &MI,
MachineBasicBlock *BB,
unsigned SEW, unsigned LMUL,
unsigned Opcode) {
DebugLoc DL = MI.getDebugLoc();

auto *TII = BB->getParent()->getSubtarget().getInstrInfo();
auto *MRI = &BB->getParent()->getRegInfo();

Register SavedVL = MRI->createVirtualRegister(&RISCV::GPRRegClass);
Register SavedVType = MRI->createVirtualRegister(&RISCV::GPRRegClass);

// Spec: The assembler pseudoinstruction to read a CSR, `CSRR rd, csr`, is
// encoded as `CSRRS rd, csr, x0`.
BuildMI(*BB, MI, DL, TII->get(RISCV::CSRRS), SavedVL)
.addImm(RISCVSysReg::lookupSysRegByName("VL")->Encoding)
.addReg(RISCV::X0);
BuildMI(*BB, MI, DL, TII->get(RISCV::CSRRS), SavedVType)
.addImm(RISCVSysReg::lookupSysRegByName("VTYPE")->Encoding)
.addReg(RISCV::X0);

// Generate `vsetvli x0, x0, e<SEW>, m<LMUL>`
auto VTypeI = RISCVVType::encodeXTHeadVTYPE(SEW, LMUL, 1);
BuildMI(*BB, MI, DL, TII->get(RISCV::XVSETVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0)
.addImm(VTypeI)
.addReg(RISCV::VL, RegState::Implicit);

// Generate `vle.v` or `vse.v`
// From GCC: `vl<LMUL>re<SEW>.v vd, (rs)` -> `vle.v vd, (rs), vm`
// From GCC: `vs<LMUL>r.v vd, (rs)` -> `vse.v vs, (rs), vm`
BuildMI(*BB, MI, DL, TII->get(Opcode))
.add(MI.getOperand(0)) // vd or vs
.add(MI.getOperand(1)) // rs, the load/store address
.addReg(RISCV::NoRegister); // vmask, currently no mask

// Restore vl, vtype with `vsetvl x0, SavedVL, SavedVType`
BuildMI(*BB, MI, DL, TII->get(RISCV::XVSETVL))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(SavedVL, RegState::Kill)
.addReg(SavedVType, RegState::Kill);

// Erase the pseudoinstruction.
MI.eraseFromParent();
return BB;
}

static MachineBasicBlock *emitXWholeLoad(MachineInstr &MI,
MachineBasicBlock *BB, unsigned SEW,
unsigned LMUL) {
return emitXWholeLoadStore(MI, BB, SEW, LMUL, RISCV::XVLE_V);
}

static MachineBasicBlock *emitXWholeStore(MachineInstr &MI,
MachineBasicBlock *BB, unsigned SEW,
unsigned LMUL) {
return emitXWholeLoadStore(MI, BB, SEW, LMUL, RISCV::XVSE_V);
}

MachineBasicBlock *
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
Expand Down Expand Up @@ -14599,6 +14659,38 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case RISCV::PseudoFROUND_D_INX:
case RISCV::PseudoFROUND_D_IN32X:
return emitFROUND(MI, BB, Subtarget);

#define PseudoXVL_CASE_SEW_LMUL(SEW_val, LMUL_val) \
case RISCV::PseudoXVL##LMUL_val##RE##SEW_val##_V: \
return emitXWholeLoad(MI, BB, SEW_val, LMUL_val);

#define PseudoXVL_CASE_SEW(SEW_val) \
PseudoXVL_CASE_SEW_LMUL(SEW_val, 1); \
PseudoXVL_CASE_SEW_LMUL(SEW_val, 2); \
PseudoXVL_CASE_SEW_LMUL(SEW_val, 4); \
PseudoXVL_CASE_SEW_LMUL(SEW_val, 8);

// Emulated whole load instructions for RVV 0.7
PseudoXVL_CASE_SEW(8);
PseudoXVL_CASE_SEW(16);
PseudoXVL_CASE_SEW(32);
PseudoXVL_CASE_SEW(64);

#define PseudoXVS_CASE_SEW_LMUL(SEW_val, LMUL_val) \
case RISCV::PseudoXVS##LMUL_val##RE##SEW_val##_V: \
return emitXWholeStore(MI, BB, SEW_val, LMUL_val);

#define PseudoXVS_CASE_SEW(SEW_val) \
PseudoXVS_CASE_SEW_LMUL(SEW_val, 1); \
PseudoXVS_CASE_SEW_LMUL(SEW_val, 2); \
PseudoXVS_CASE_SEW_LMUL(SEW_val, 4); \
PseudoXVS_CASE_SEW_LMUL(SEW_val, 8);

// Emulated whole store instructions for RVV 0.7
PseudoXVS_CASE_SEW(8);
PseudoXVS_CASE_SEW(16);
PseudoXVS_CASE_SEW(32);
PseudoXVS_CASE_SEW(64);
}
}

Expand Down
25 changes: 18 additions & 7 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
Register SrcReg = MBBI->getOperand(1).getReg();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();

bool XTHeadV = STI.hasVendorXTHeadV();

bool FoundDef = false;
bool FirstVSetVLI = false;
unsigned FirstSEW = 0;
Expand All @@ -184,7 +186,9 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,

if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
MBBI->getOpcode() == RISCV::PseudoVSETIVLI ||
MBBI->getOpcode() == RISCV::PseudoXVSETVLI ||
MBBI->getOpcode() == RISCV::PseudoXVSETVLIX0) {
// There is a vsetvli between COPY and source define instruction.
// vy = def_vop ... (producing instruction)
// ...
Expand All @@ -195,8 +199,11 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
if (!FirstVSetVLI) {
FirstVSetVLI = true;
unsigned FirstVType = MBBI->getOperand(2).getImm();
RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
FirstSEW = RISCVVType::getSEW(FirstVType);
RISCVII::VLMUL FirstLMul =
XTHeadV ? RISCVVType::getXTHeadVVLMUL(FirstVType)
: RISCVVType::getVLMUL(FirstVType);
FirstSEW = XTHeadV ? RISCVVType::getXTHeadVSEW(FirstVType)
: RISCVVType::getSEW(FirstVType);
// The first encountered vsetvli must have the same lmul as the
// register class of COPY.
if (FirstLMul != LMul)
Expand All @@ -217,21 +224,25 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
unsigned VType = MBBI->getOperand(2).getImm();
// If there is a vsetvli between COPY and the producing instruction.
if (FirstVSetVLI) {
// If SEW is different, return false.
if (RISCVVType::getSEW(VType) != FirstSEW)
// If NewSEW is different, return false.
auto NewSEW = XTHeadV ? RISCVVType::getXTHeadVSEW(VType)
: RISCVVType::getSEW(VType);
if (NewSEW != FirstSEW)
return false;
}

// If the vsetvli is tail undisturbed, keep the whole register move.
if (!RISCVVType::isTailAgnostic(VType))
if (!XTHeadV && !RISCVVType::isTailAgnostic(VType))
return false;

// The checking is conservative. We only have register classes for
// LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
// for fractional LMUL operations. However, we could not use the vsetvli
// lmul for widening operations. The result of widening operation is
// 2 x LMUL.
return LMul == RISCVVType::getVLMUL(VType);
auto NewLMul = XTHeadV ? RISCVVType::getXTHeadVVLMUL(VType)
: RISCVVType::getVLMUL(VType);
return LMul == NewLMul;
} else if (MBBI->isInlineAsm() || MBBI->isCall()) {
return false;
} else if (MBBI->getNumDefs()) {
Expand Down
83 changes: 83 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ defset list<VTypeInfo> AllXVectors = {
}
}

class GetXVTypePredicates<VTypeInfo vti> {
// TODO: distinguish different types (like F16, F32, F64, AnyF)? Is it needed?
list<Predicate> Predicates = !cond(!eq(vti.Scalar, f16) : [HasVendorXTHeadV],
!eq(vti.Scalar, f32) : [HasVendorXTHeadV],
!eq(vti.Scalar, f64) : [HasVendorXTHeadV],
!eq(vti.SEW, 64) : [HasVendorXTHeadV],
true : [HasVendorXTHeadV]);
}

class XTHeadVVL<bit M, bit ST, bit U, bit E, bits<3> ME, bits<3> S, bits<3> L> {
bits<1> Masked = M;
bits<1> Strided = ST;
Expand Down Expand Up @@ -419,6 +428,80 @@ let Predicates = [HasVendorXTHeadV] in {
defm PseudoXVS : XVPseudoSStore;
} // Predicates = [HasVendorXTHeadV]

//===----------------------------------------------------------------------===//
// 7. Vector Loads and Stores
// for emulating Vector Load/Store Whole Register Instructions in RVV 1.0
//===----------------------------------------------------------------------===//
class VPseudoWholeLoad<Instruction instr, LMULInfo m, RegisterClass VRC>
: VPseudo<instr, m, (outs VRC:$vd), (ins GPRMemZeroOffset:$rs1)> {
}

multiclass XVPseudoWholeLoadN<bits<3> nf, LMULInfo m, RegisterClass VRC> {
foreach l = [8, 16, 32, 64] in {
defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");

def E # l # _V : VPseudoWholeLoad<XVLE_V, m, VRC>,
Sched<[s, ReadVLDX]>;
}
}

class VPseudoWholeStore<Instruction instr, LMULInfo m, RegisterClass VRC>
: VPseudo<instr, m, (outs), (ins VRC:$vs3, GPRMemZeroOffset:$rs1)> {
}

multiclass XVPseudoWholeStoreN<bits<3> nf, LMULInfo m, RegisterClass VRC> {
foreach l = [8, 16, 32, 64] in {
defvar sw = !cast<SchedWrite>("WriteVST" # !add(nf, 1) # "R");
defvar sr = !cast<SchedRead>("ReadVST" # !add(nf, 1) # "R");

def E # l # _V : VPseudoWholeStore<XVSE_V, m, VRC>,
Sched<[sw, sr, ReadVSTX]>;
}
}

let Predicates = [HasVendorXTHeadV] in {
// Whole register load
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1, usesCustomInserter = 1 in {
defm PseudoXVL1R : XVPseudoWholeLoadN<0, V_M1, VR>;
defm PseudoXVL2R : XVPseudoWholeLoadN<1, V_M2, VRM2>;
defm PseudoXVL4R : XVPseudoWholeLoadN<3, V_M4, VRM4>;
defm PseudoXVL8R : XVPseudoWholeLoadN<7, V_M8, VRM8>;
}
// Whole register store
let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
defm PseudoXVS1R : XVPseudoWholeStoreN<0, V_M1, VR>;
defm PseudoXVS2R : XVPseudoWholeStoreN<1, V_M2, VRM2>;
defm PseudoXVS4R : XVPseudoWholeStoreN<3, V_M4, VRM4>;
defm PseudoXVS8R : XVPseudoWholeStoreN<7, V_M8, VRM8>;
}
} // Predicates = [HasVendorXTHeadV]

multiclass XVPatUSLoadStoreWholeVRSDNode<ValueType type,
int log2sew,
LMULInfo vlmul,
VReg reg_class,
int sew = !shl(1, log2sew)> {
defvar load_instr =
!cast<Instruction>("PseudoXVL"#!substr(vlmul.MX, 1)#"RE"#sew#"_V");
defvar store_instr =
!cast<Instruction>("PseudoXVS"#!substr(vlmul.MX, 1)#"RE"#sew#"_V");

// Load
def : Pat<(type (load GPR:$rs1)),
(load_instr GPR:$rs1)>;
// Store
def : Pat<(store type:$rs2, GPR:$rs1),
(store_instr reg_class:$rs2, GPR:$rs1)>;
}
foreach vti = [XVI8M1, XVI16M1, XVI32M1, XVI64M1] in
let Predicates = GetXVTypePredicates<vti>.Predicates in
defm : XVPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach vti = GroupIntegerXVectors in
let Predicates = GetXVTypePredicates<vti>.Predicates in
defm : XVPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;

//===----------------------------------------------------------------------===//
// 8. Vector AMO Operations
//===----------------------------------------------------------------------===//
Expand Down
34 changes: 27 additions & 7 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,40 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool hasMacroFusion() const { return hasLUIADDIFusion(); }

// Vector codegen related methods.
// If a SubTarget has either standard V or XTHeadV:
bool hasVInstructions() const {
return HasStdExtZve32x || HasVendorXTHeadV;
return hasOnlyStdV() || hasVendorXTHeadV();
}
bool hasVInstructionsI64() const {
return HasStdExtZve64x || HasVendorXTHeadV;
return hasOnlyStdVI64() || hasVendorXTHeadV();
}
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
bool hasVInstructionsF16() const { return hasOnlyStdVF16(); }
bool hasVInstructionsF32() const { return hasOnlyStdVF32(); }
bool hasVInstructionsF64() const { return hasOnlyStdVF64(); }
bool hasVInstructionsAnyF() const { return hasOnlyStdVAnyF(); }
bool hasVInstructionsFullMultiply() const { return hasOnlyStdV() || hasVendorXTHeadV(); }
// If a SubTarget only has the standard V extension:
bool hasOnlyStdV() const {
return HasStdExtZve32x;
}
bool hasOnlyStdVI64() const {
return HasStdExtZve64x;
}
bool hasOnlyStdVF16() const { return HasStdExtZvfh; }
// FIXME: Consider Zfinx in the future
bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
bool hasOnlyStdVF32() const { return HasStdExtZve32f && HasStdExtF; }
// FIXME: Consider Zdinx in the future
bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; }
bool hasOnlyStdVF64() const { return HasStdExtZve64d && HasStdExtD; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
bool hasOnlyStdVAnyF() const { return hasOnlyStdVF32(); }
bool hasOnlyStdVFullMultiply() const { return HasStdExtV; }
// XTHeadV codegen related methods.
bool hasStdVOrXTHeadV() const {
return hasVInstructions() || hasVendorXTHeadV();
}
bool hasStdVOrXTHeadVI64() const {
return hasVInstructionsI64() || hasVendorXTHeadV();
}
unsigned getMaxInterleaveFactor() const {
return hasVInstructions() ? MaxInterleaveFactor : 1;
}
Expand Down
Loading

0 comments on commit b74d592

Please sign in to comment.