[LLVM][RVV 0.7.1] Emulate vector register whole load/store, and fix p…

…otential instruction selection bugs (ruyisdk#23) * [LLVM][RVV 0.7.1] Strictly distinguish RVV versions in TableGen files * [LLVM][RVV 0.7.1] Start emulating register whole load/store * [LLVM][RVV 0.7.1] All use `XVSE_V` * [LLVM][RVV 0.7.1] Start expanding whole load/store pseudos * [LLVM][RVV 0.7.1] Expand whole load * [LLVM][RVV 0.7.1] Correctly expand whole load! ``` vl<LMUL>re<SEW>.v vd, (rs1) ``` is expanded to ``` csrr t5, vl csrr t6, vtype vsetvli x0, x0, e<SEW>, m<LMUL> vle.v vd, (rs1) vsetvl x0, t5, t6 ``` * [LLVM][RVV 0.7.1] Correctly expand whole store! ``` vs<LMUL>r.v vd, (rs1) ``` is expanded to ``` csrr t5, vl csrr t6, vtype vsetvli x0, x0, e<SEW>, m<LMUL> vse.v vd, (rs1) vsetvl x0, t5, t6 ``` * [LLVM][RVV 0.7.1] Extract common part * [LLVM][RVV 0.7.1] Make lowering easier * [LLVM][RVV 0.7.1] Remove unnecessary changes * [LLVM][RVV 0.7.1] Test whole load/store for M1 cases
imkiva · Apr 1, 2024 · b74d592 · b74d592
1 parent 9cdf30e
commit b74d592
Show file tree

Hide file tree

Showing 9 changed files with 427 additions and 22 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -441,25 +441,25 @@ def FeatureStdExtV
                        "'V' (Vector Extension for Application Processors)",
                        [FeatureStdExtZvl128b, FeatureStdExtZve64d]>;
 
-def HasVInstructions    : Predicate<"Subtarget->hasVInstructions()">,
+def HasVInstructions    : Predicate<"Subtarget->hasOnlyStdV()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve32x),
           "'V' (Vector Extension for Application Processors), 'Zve32x' or "
           "'Zve64x' (Vector Extensions for Embedded Processors)">;
-def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
+def HasVInstructionsI64 : Predicate<"Subtarget->hasOnlyStdVI64()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve64x),
           "'V' (Vector Extension for Application Processors) or 'Zve64x' "
           "(Vector Extensions for Embedded Processors)">;
-def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
+def HasVInstructionsAnyF : Predicate<"Subtarget->hasOnlyStdVAnyF()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve32f),
           "'V' (Vector Extension for Application Processors), 'Zve32f', "
           "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;
 
-def HasVInstructionsF64 : Predicate<"Subtarget->hasVInstructionsF64()">;
+def HasVInstructionsF64 : Predicate<"Subtarget->hasOnlyStdVF64()">;
 
-def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasVInstructionsFullMultiply()">;
+def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasOnlyStdVFullMultiply()">;
 
 def FeatureStdExtZvfbfmin
     : SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true",
@@ -482,7 +482,7 @@ def FeatureStdExtZvfh
                        "'Zvfh' (Vector Half-Precision Floating-Point)",
                        [FeatureStdExtZve32f, FeatureStdExtZfhmin]>;
 
-def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">;
+def HasVInstructionsF16 : Predicate<"Subtarget->hasOnlyStdVF16()">;
 
 def HasStdExtZfhOrZvfh
     : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZvfh()">,
@@ -926,13 +926,13 @@ def HasVendorXTHeadVediv : Predicate<"Subtarget->hasVendorXTHeadVediv()">,
                                      "'xtheadvediv' (T-Head Divided Element Extension)">;
 
 // Predicates for reusing instructions/intrinsics in both RVV 1.0 and 0.7
-def HasStdVOrXTHeadV    : Predicate<"Subtarget->hasVInstructions()">,
+def HasStdVOrXTHeadV    : Predicate<"Subtarget->hasStdVOrXTHeadV()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve32x, FeatureVendorXTHeadV),
           "'V' (Vector Extension for Application Processors), 'Zve32x', "
           "'Zve64x' (Vector Extensions for Embedded Processors) or"
           "'XTHeadV' (Vector Extension for T-Head)">;
-def HasStdVOrXTHeadVI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
+def HasStdVOrXTHeadVI64 : Predicate<"Subtarget->hasStdVOrXTHeadVI64()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve64x, FeatureVendorXTHeadV),
           "'V' (Vector Extension for Application Processors), 'Zve64x' "

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14480,6 +14480,66 @@ static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
   return DoneMBB;
 }
 
+static MachineBasicBlock *emitXWholeLoadStore(MachineInstr &MI,
+                                              MachineBasicBlock *BB,
+                                              unsigned SEW, unsigned LMUL,
+                                              unsigned Opcode) {
+  DebugLoc DL = MI.getDebugLoc();
+
+  auto *TII = BB->getParent()->getSubtarget().getInstrInfo();
+  auto *MRI = &BB->getParent()->getRegInfo();
+
+  Register SavedVL = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+  Register SavedVType = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+
+  // Spec: The assembler pseudoinstruction to read a CSR, `CSRR rd, csr`, is
+  // encoded as `CSRRS rd, csr, x0`.
+  BuildMI(*BB, MI, DL, TII->get(RISCV::CSRRS), SavedVL)
+      .addImm(RISCVSysReg::lookupSysRegByName("VL")->Encoding)
+      .addReg(RISCV::X0);
+  BuildMI(*BB, MI, DL, TII->get(RISCV::CSRRS), SavedVType)
+      .addImm(RISCVSysReg::lookupSysRegByName("VTYPE")->Encoding)
+      .addReg(RISCV::X0);
+
+  // Generate `vsetvli x0, x0, e<SEW>, m<LMUL>`
+  auto VTypeI = RISCVVType::encodeXTHeadVTYPE(SEW, LMUL, 1);
+  BuildMI(*BB, MI, DL, TII->get(RISCV::XVSETVLI))
+      .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+      .addReg(RISCV::X0)
+      .addImm(VTypeI)
+      .addReg(RISCV::VL, RegState::Implicit);
+
+  // Generate `vle.v` or `vse.v`
+  // From GCC: `vl<LMUL>re<SEW>.v vd, (rs)` -> `vle.v vd, (rs), vm`
+  // From GCC: `vs<LMUL>r.v       vd, (rs)` -> `vse.v vs, (rs), vm`
+  BuildMI(*BB, MI, DL, TII->get(Opcode))
+      .add(MI.getOperand(0))      // vd or vs
+      .add(MI.getOperand(1))      // rs, the load/store address
+      .addReg(RISCV::NoRegister); // vmask, currently no mask
+
+  // Restore vl, vtype with `vsetvl x0, SavedVL, SavedVType`
+  BuildMI(*BB, MI, DL, TII->get(RISCV::XVSETVL))
+      .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+      .addReg(SavedVL, RegState::Kill)
+      .addReg(SavedVType, RegState::Kill);
+
+  // Erase the pseudoinstruction.
+  MI.eraseFromParent();
+  return BB;
+}
+
+static MachineBasicBlock *emitXWholeLoad(MachineInstr &MI,
+                                         MachineBasicBlock *BB, unsigned SEW,
+                                         unsigned LMUL) {
+  return emitXWholeLoadStore(MI, BB, SEW, LMUL, RISCV::XVLE_V);
+}
+
+static MachineBasicBlock *emitXWholeStore(MachineInstr &MI,
+                                          MachineBasicBlock *BB, unsigned SEW,
+                                          unsigned LMUL) {
+  return emitXWholeLoadStore(MI, BB, SEW, LMUL, RISCV::XVSE_V);
+}
+
 MachineBasicBlock *
 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                  MachineBasicBlock *BB) const {
@@ -14599,6 +14659,38 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case RISCV::PseudoFROUND_D_INX:
   case RISCV::PseudoFROUND_D_IN32X:
     return emitFROUND(MI, BB, Subtarget);
+
+#define PseudoXVL_CASE_SEW_LMUL(SEW_val, LMUL_val)                             \
+  case RISCV::PseudoXVL##LMUL_val##RE##SEW_val##_V:                            \
+    return emitXWholeLoad(MI, BB, SEW_val, LMUL_val);
+
+#define PseudoXVL_CASE_SEW(SEW_val)                                            \
+  PseudoXVL_CASE_SEW_LMUL(SEW_val, 1);                                         \
+  PseudoXVL_CASE_SEW_LMUL(SEW_val, 2);                                         \
+  PseudoXVL_CASE_SEW_LMUL(SEW_val, 4);                                         \
+  PseudoXVL_CASE_SEW_LMUL(SEW_val, 8);
+
+  // Emulated whole load instructions for RVV 0.7
+  PseudoXVL_CASE_SEW(8);
+  PseudoXVL_CASE_SEW(16);
+  PseudoXVL_CASE_SEW(32);
+  PseudoXVL_CASE_SEW(64);
+
+#define PseudoXVS_CASE_SEW_LMUL(SEW_val, LMUL_val)                             \
+  case RISCV::PseudoXVS##LMUL_val##RE##SEW_val##_V:                            \
+    return emitXWholeStore(MI, BB, SEW_val, LMUL_val);
+
+#define PseudoXVS_CASE_SEW(SEW_val)                                            \
+  PseudoXVS_CASE_SEW_LMUL(SEW_val, 1);                                         \
+  PseudoXVS_CASE_SEW_LMUL(SEW_val, 2);                                         \
+  PseudoXVS_CASE_SEW_LMUL(SEW_val, 4);                                         \
+  PseudoXVS_CASE_SEW_LMUL(SEW_val, 8);
+
+  // Emulated whole store instructions for RVV 0.7
+  PseudoXVS_CASE_SEW(8);
+  PseudoXVS_CASE_SEW(16);
+  PseudoXVS_CASE_SEW(32);
+  PseudoXVS_CASE_SEW(64);
   }
 }
 

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -174,6 +174,8 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
   Register SrcReg = MBBI->getOperand(1).getReg();
   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
 
+  bool XTHeadV = STI.hasVendorXTHeadV();
+
   bool FoundDef = false;
   bool FirstVSetVLI = false;
   unsigned FirstSEW = 0;
@@ -184,7 +186,9 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
 
     if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
         MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
-        MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
+        MBBI->getOpcode() == RISCV::PseudoVSETIVLI ||
+        MBBI->getOpcode() == RISCV::PseudoXVSETVLI ||
+        MBBI->getOpcode() == RISCV::PseudoXVSETVLIX0) {
       // There is a vsetvli between COPY and source define instruction.
       // vy = def_vop ...  (producing instruction)
       // ...
@@ -195,8 +199,11 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
         if (!FirstVSetVLI) {
           FirstVSetVLI = true;
           unsigned FirstVType = MBBI->getOperand(2).getImm();
-          RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
-          FirstSEW = RISCVVType::getSEW(FirstVType);
+          RISCVII::VLMUL FirstLMul =
+              XTHeadV ? RISCVVType::getXTHeadVVLMUL(FirstVType)
+                      : RISCVVType::getVLMUL(FirstVType);
+          FirstSEW = XTHeadV ? RISCVVType::getXTHeadVSEW(FirstVType)
+                             : RISCVVType::getSEW(FirstVType);
           // The first encountered vsetvli must have the same lmul as the
           // register class of COPY.
           if (FirstLMul != LMul)
@@ -217,21 +224,25 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
       unsigned VType = MBBI->getOperand(2).getImm();
       // If there is a vsetvli between COPY and the producing instruction.
       if (FirstVSetVLI) {
-        // If SEW is different, return false.
-        if (RISCVVType::getSEW(VType) != FirstSEW)
+        // If NewSEW is different, return false.
+        auto NewSEW = XTHeadV ? RISCVVType::getXTHeadVSEW(VType)
+                              : RISCVVType::getSEW(VType);
+        if (NewSEW != FirstSEW)
           return false;
       }
 
       // If the vsetvli is tail undisturbed, keep the whole register move.
-      if (!RISCVVType::isTailAgnostic(VType))
+      if (!XTHeadV && !RISCVVType::isTailAgnostic(VType))
         return false;
 
       // The checking is conservative. We only have register classes for
       // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
       // for fractional LMUL operations. However, we could not use the vsetvli
       // lmul for widening operations. The result of widening operation is
       // 2 x LMUL.
-      return LMul == RISCVVType::getVLMUL(VType);
+      auto NewLMul = XTHeadV ? RISCVVType::getXTHeadVVLMUL(VType)
+                             : RISCVVType::getVLMUL(VType);
+      return LMul == NewLMul;
     } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
       return false;
     } else if (MBBI->getNumDefs()) {

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td
@@ -72,6 +72,15 @@ defset list<VTypeInfo> AllXVectors = {
   }
 }
 
+class GetXVTypePredicates<VTypeInfo vti> {
+  // TODO: distinguish different types (like F16, F32, F64, AnyF)? Is it needed?
+  list<Predicate> Predicates = !cond(!eq(vti.Scalar, f16) : [HasVendorXTHeadV],
+                                     !eq(vti.Scalar, f32) : [HasVendorXTHeadV],
+                                     !eq(vti.Scalar, f64) : [HasVendorXTHeadV],
+                                     !eq(vti.SEW, 64) : [HasVendorXTHeadV],
+                                     true : [HasVendorXTHeadV]);
+}
+
 class XTHeadVVL<bit M, bit ST, bit U, bit E, bits<3> ME, bits<3> S, bits<3> L> {
   bits<1> Masked = M;
   bits<1> Strided = ST;
@@ -419,6 +428,80 @@ let Predicates = [HasVendorXTHeadV] in {
   defm PseudoXVS : XVPseudoSStore;
 } // Predicates = [HasVendorXTHeadV]
 
+//===----------------------------------------------------------------------===//
+// 7. Vector Loads and Stores
+// for emulating Vector Load/Store Whole Register Instructions in RVV 1.0
+//===----------------------------------------------------------------------===//
+class VPseudoWholeLoad<Instruction instr, LMULInfo m, RegisterClass VRC>
+  : VPseudo<instr, m, (outs VRC:$vd),   (ins GPRMemZeroOffset:$rs1)> {
+}
+
+multiclass XVPseudoWholeLoadN<bits<3> nf, LMULInfo m, RegisterClass VRC> {
+  foreach l = [8, 16, 32, 64] in {
+    defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");
+
+    def E # l # _V : VPseudoWholeLoad<XVLE_V, m, VRC>,
+                     Sched<[s, ReadVLDX]>;
+  }
+}
+
+class VPseudoWholeStore<Instruction instr, LMULInfo m, RegisterClass VRC>
+  : VPseudo<instr, m, (outs),   (ins VRC:$vs3, GPRMemZeroOffset:$rs1)> {
+}
+
+multiclass XVPseudoWholeStoreN<bits<3> nf, LMULInfo m, RegisterClass VRC> {
+  foreach l = [8, 16, 32, 64] in {
+    defvar sw = !cast<SchedWrite>("WriteVST" # !add(nf, 1) # "R");
+    defvar sr = !cast<SchedRead>("ReadVST" # !add(nf, 1) # "R");
+
+    def E # l # _V : VPseudoWholeStore<XVSE_V, m, VRC>,
+                     Sched<[sw, sr, ReadVSTX]>;
+  }
+}
+
+let Predicates = [HasVendorXTHeadV] in {
+  // Whole register load
+  let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1, usesCustomInserter = 1 in {
+    defm PseudoXVL1R : XVPseudoWholeLoadN<0, V_M1, VR>;
+    defm PseudoXVL2R : XVPseudoWholeLoadN<1, V_M2, VRM2>;
+    defm PseudoXVL4R : XVPseudoWholeLoadN<3, V_M4, VRM4>;
+    defm PseudoXVL8R : XVPseudoWholeLoadN<7, V_M8, VRM8>;
+  }
+  // Whole register store
+  let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
+    defm PseudoXVS1R : XVPseudoWholeStoreN<0, V_M1, VR>;
+    defm PseudoXVS2R : XVPseudoWholeStoreN<1, V_M2, VRM2>;
+    defm PseudoXVS4R : XVPseudoWholeStoreN<3, V_M4, VRM4>;
+    defm PseudoXVS8R : XVPseudoWholeStoreN<7, V_M8, VRM8>;
+  }
+} // Predicates = [HasVendorXTHeadV]
+
+multiclass XVPatUSLoadStoreWholeVRSDNode<ValueType type,
+                                         int log2sew,
+                                         LMULInfo vlmul,
+                                         VReg reg_class,
+                                         int sew = !shl(1, log2sew)> {
+  defvar load_instr =
+    !cast<Instruction>("PseudoXVL"#!substr(vlmul.MX, 1)#"RE"#sew#"_V");
+  defvar store_instr =
+    !cast<Instruction>("PseudoXVS"#!substr(vlmul.MX, 1)#"RE"#sew#"_V");
+
+  // Load
+  def : Pat<(type (load GPR:$rs1)),
+            (load_instr GPR:$rs1)>;
+  // Store
+  def : Pat<(store type:$rs2, GPR:$rs1),
+            (store_instr reg_class:$rs2, GPR:$rs1)>;
+}
+foreach vti = [XVI8M1, XVI16M1, XVI32M1, XVI64M1] in
+  let Predicates = GetXVTypePredicates<vti>.Predicates in
+  defm : XVPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
+                                       vti.RegClass>;
+foreach vti = GroupIntegerXVectors in
+  let Predicates = GetXVTypePredicates<vti>.Predicates in
+  defm : XVPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
+                                       vti.RegClass>;
+
 //===----------------------------------------------------------------------===//
 // 8. Vector AMO Operations
 //===----------------------------------------------------------------------===//

diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -165,20 +165,40 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasMacroFusion() const { return hasLUIADDIFusion(); }
 
   // Vector codegen related methods.
+  // If a SubTarget has either standard V or XTHeadV:
   bool hasVInstructions() const {
-    return HasStdExtZve32x || HasVendorXTHeadV;
+    return hasOnlyStdV() || hasVendorXTHeadV();
   }
   bool hasVInstructionsI64() const {
-    return HasStdExtZve64x || HasVendorXTHeadV;
+    return hasOnlyStdVI64() || hasVendorXTHeadV();
   }
-  bool hasVInstructionsF16() const { return HasStdExtZvfh; }
+  bool hasVInstructionsF16() const { return hasOnlyStdVF16(); }
+  bool hasVInstructionsF32() const { return hasOnlyStdVF32(); }
+  bool hasVInstructionsF64() const { return hasOnlyStdVF64(); }
+  bool hasVInstructionsAnyF() const { return hasOnlyStdVAnyF(); }
+  bool hasVInstructionsFullMultiply() const { return hasOnlyStdV() || hasVendorXTHeadV(); }
+  //  If a SubTarget only has the standard V extension:
+  bool hasOnlyStdV() const {
+    return HasStdExtZve32x;
+  }
+  bool hasOnlyStdVI64() const {
+    return HasStdExtZve64x;
+  }
+  bool hasOnlyStdVF16() const { return HasStdExtZvfh; }
   // FIXME: Consider Zfinx in the future
-  bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
+  bool hasOnlyStdVF32() const { return HasStdExtZve32f && HasStdExtF; }
   // FIXME: Consider Zdinx in the future
-  bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; }
+  bool hasOnlyStdVF64() const { return HasStdExtZve64d && HasStdExtD; }
   // F16 and F64 both require F32.
-  bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
-  bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
+  bool hasOnlyStdVAnyF() const { return hasOnlyStdVF32(); }
+  bool hasOnlyStdVFullMultiply() const { return HasStdExtV; }
+  // XTHeadV codegen related methods.
+  bool hasStdVOrXTHeadV() const {
+    return hasVInstructions() || hasVendorXTHeadV();
+  }
+  bool hasStdVOrXTHeadVI64() const {
+    return hasVInstructionsI64() || hasVendorXTHeadV();
+  }
   unsigned getMaxInterleaveFactor() const {
     return hasVInstructions() ? MaxInterleaveFactor : 1;
   }