From 34b4bcffe9c7943af6a465c11eb1d987199f96cb Mon Sep 17 00:00:00 2001 From: Liao Chunyu Date: Fri, 29 Dec 2023 07:28:39 -0500 Subject: [PATCH] [RISCV] Codegen support for XCVmem extension All post-Increment load/store, register-register load/store spec: https://github.com/openhwgroup/cv32e40p/blob/master/docs/source/instruction_set_extensions.rst --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 2 +- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 74 +++++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 2 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 30 ++ llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td | 45 ++- llvm/test/CodeGen/RISCV/xcvmem.ll | 295 ++++++++++++++++++ 6 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/xcvmem.ll diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index d92998ced91eff..5906a2cdb3bfa1 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1245,7 +1245,7 @@ struct RISCVOperand final : public MCParsedAsmOperand { } void addRegRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); + assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(RegReg.Reg1)); Inst.addOperand(MCOperand::createReg(RegReg.Reg2)); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index d965dd4fc9a951..c145dcbc401c62 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1527,6 +1527,67 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case ISD::LOAD: { if (tryIndexedLoad(Node)) return; + + if (Subtarget->hasVendorXCVmem()) { + // We match post-incrementing load here + LoadSDNode *Load = cast(Node); + if (Load->getAddressingMode() != ISD::POST_INC) + break; + + SDValue Chain = Node->getOperand(0); + SDValue Base = Node->getOperand(1); + SDValue Offset = Node->getOperand(2); + + bool Simm12 = false; + bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD; + + if (auto ConstantOffset = dyn_cast(Offset)) { + int ConstantVal = ConstantOffset->getSExtValue(); + Simm12 = isInt<12>(ConstantVal); + if (Simm12) + Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset), + Offset.getValueType()); + } + + unsigned Opcode = 0; + switch (Load->getMemoryVT().getSimpleVT().SimpleTy) { + case MVT::i8: + if (Simm12 && SignExtend) + Opcode = RISCV::CV_LB_ri_inc; + else if (Simm12 && !SignExtend) + Opcode = RISCV::CV_LBU_ri_inc; + else if (!Simm12 && SignExtend) + Opcode = RISCV::CV_LB_rr_inc; + else + Opcode = RISCV::CV_LBU_rr_inc; + break; + case MVT::i16: + if (Simm12 && SignExtend) + Opcode = RISCV::CV_LH_ri_inc; + else if (Simm12 && !SignExtend) + Opcode = RISCV::CV_LHU_ri_inc; + else if (!Simm12 && SignExtend) + Opcode = RISCV::CV_LH_rr_inc; + else + Opcode = RISCV::CV_LHU_rr_inc; + break; + case MVT::i32: + if (Simm12) + Opcode = RISCV::CV_LW_ri_inc; + else + Opcode = RISCV::CV_LW_rr_inc; + break; + default: + break; + } + if (!Opcode) + break; + + ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT, + Chain.getSimpleValueType(), Base, + Offset, Chain)); + return; + } break; } case ISD::INTRINSIC_WO_CHAIN: { @@ -2669,6 +2730,19 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, return true; } +bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() != ISD::ADD) + return false; + + if (isa(Addr.getOperand(1))) + return false; + + Base = Addr.getOperand(1); + Offset = Addr.getOperand(0); + return true; +} + bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { ShAmt = N; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index ece04dd7f4b752..b496af66504319 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -80,6 +80,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { return false; } + bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset); + bool tryShrinkShlLogicImm(SDNode *Node); bool trySignedBitfieldExtract(SDNode *Node); bool tryIndexedLoad(SDNode *Node); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8ace5d79af079b..e96e080b77c805 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1433,6 +1433,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } + if (Subtarget.hasVendorXCVmem()) { + setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + + setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); + } + // Function alignments. const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); setMinFunctionAlignment(FunctionAlignment); @@ -20909,6 +20919,26 @@ bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { + if (Subtarget.hasVendorXCVmem()) { + if (Op->getOpcode() != ISD::ADD) + return false; + + if (LSBaseSDNode *LS = dyn_cast(N)) + Base = LS->getBasePtr(); + else + return false; + + if (Base == Op->getOperand(0)) + Offset = Op->getOperand(1); + else if (Base == Op->getOperand(1)) + Offset = Op->getOperand(0); + else + return false; + + AM = ISD::POST_INC; + return true; + } + EVT VT; SDValue Ptr; if (LoadSDNode *LD = dyn_cast(N)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index 6dae8ca8f7a84e..f0d6913a9d3fec 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -512,11 +512,13 @@ def CVrrAsmOperand : AsmOperandClass { let DiagnosticType = "InvalidRegReg"; } -def CVrr : Operand { +def CVrr : Operand, + ComplexPattern { let ParserMatchClass = CVrrAsmOperand; let EncoderMethod = "getRegReg"; let DecoderMethod = "decodeRegReg"; let PrintMethod = "printRegReg"; + let MIOperandInfo = (ops GPR:$base, GPR:$offset); } class CVLoad_ri_inc funct3, string opcodestr> @@ -659,6 +661,47 @@ let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 0, def CV_ELW : CVLoad_ri<0b011, "cv.elw">; } +//===----------------------------------------------------------------------===// +// Patterns for load & store operations +//===----------------------------------------------------------------------===// +class CVLdrrPat + : Pat<(XLenVT (LoadOp CVrr:$regreg)), + (Inst CVrr:$regreg)>; + +class CVStriPat + : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, simm12:$imm12), + (Inst GPR:$rs2, GPR:$rs1, simm12:$imm12)>; + +class CVStrriPat + : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, GPR:$rs3), + (Inst GPR:$rs2, GPR:$rs1, GPR:$rs3)>; + +class CVStrrPat + : Pat<(StoreOp (XLenVT GPR:$rs2), CVrr:$regreg), + (Inst GPR:$rs2, CVrr:$regreg)>; + +let Predicates = [HasVendorXCVmem, IsRV32], AddedComplexity = 1 in { + def : CVLdrrPat; + def : CVLdrrPat; + def : CVLdrrPat; + def : CVLdrrPat; + def : CVLdrrPat; + def : CVLdrrPat; + def : CVLdrrPat; + + def : CVStriPat; + def : CVStriPat; + def : CVStriPat; + + def : CVStrriPat; + def : CVStrriPat; + def : CVStrriPat; + + def : CVStrrPat; + def : CVStrrPat; + def : CVStrrPat; +} + def cv_tuimm2 : TImmLeaf(Imm);}]>; def cv_tuimm5 : TImmLeaf(Imm);}]>; def cv_uimm10 : ImmLeaf(Imm);}]>; diff --git a/llvm/test/CodeGen/RISCV/xcvmem.ll b/llvm/test/CodeGen/RISCV/xcvmem.ll new file mode 100644 index 00000000000000..037e49b9b0df7d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xcvmem.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=riscv32 -mattr=+xcvmem -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + +define <2 x i32> @lb_ri_inc(i8* %a) { +; CHECK-LABEL: lb_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lb a1, (a0), 42 +; CHECK-NEXT: ret + %1 = load i8, i8* %a + %2 = sext i8 %1 to i32 + %3 = getelementptr i8, i8* %a, i32 42 + %4 = ptrtoint i8* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define <2 x i32> @lb_rr_inc(i8* %a, i32 %b) { +; CHECK-LABEL: lb_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lb a1, (a0), a1 +; CHECK-NEXT: ret + %1 = load i8, i8* %a + %2 = sext i8 %1 to i32 + %3 = getelementptr i8, i8* %a, i32 %b + %4 = ptrtoint i8* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define i32 @lb_rr(i8* %a, i32 %b) { +; CHECK-LABEL: lb_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lb a0, a1(a0) +; CHECK-NEXT: ret + %1 = getelementptr i8, i8* %a, i32 %b + %2 = load i8, i8* %1 + %3 = sext i8 %2 to i32 + ret i32 %3 +} + +define <2 x i32> @lbu_ri_inc(i8* %a) { +; CHECK-LABEL: lbu_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lbu a1, (a0), 42 +; CHECK-NEXT: ret + %1 = load i8, i8* %a + %2 = zext i8 %1 to i32 + %3 = getelementptr i8, i8* %a, i32 42 + %4 = ptrtoint i8* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define <2 x i32> @lbu_rr_inc(i8* %a, i32 %b) { +; CHECK-LABEL: lbu_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lbu a1, (a0), a1 +; CHECK-NEXT: ret + %1 = load i8, i8* %a + %2 = zext i8 %1 to i32 + %3 = getelementptr i8, i8* %a, i32 %b + %4 = ptrtoint i8* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define i32 @lbu_rr(i8* %a, i32 %b) { +; CHECK-LABEL: lbu_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lbu a0, a1(a0) +; CHECK-NEXT: ret + %1 = getelementptr i8, i8* %a, i32 %b + %2 = load i8, i8* %1 + %3 = zext i8 %2 to i32 + ret i32 %3 +} + +define <2 x i32> @lh_ri_inc(i16* %a) { +; CHECK-LABEL: lh_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lh a1, (a0), 84 +; CHECK-NEXT: ret + %1 = load i16, i16* %a + %2 = sext i16 %1 to i32 + %3 = getelementptr i16, i16* %a, i32 42 + %4 = ptrtoint i16* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define <2 x i32> @lh_rr_inc(i16* %a, i32 %b) { +; CHECK-LABEL: lh_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: cv.lh a1, (a0), a1 +; CHECK-NEXT: ret + %1 = load i16, i16* %a + %2 = sext i16 %1 to i32 + %3 = getelementptr i16, i16* %a, i32 %b + %4 = ptrtoint i16* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define i32 @lh_rr(i16* %a, i32 %b) { +; CHECK-LABEL: lh_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: cv.lh a0, a1(a0) +; CHECK-NEXT: ret + %1 = getelementptr i16, i16* %a, i32 %b + %2 = load i16, i16* %1 + %3 = sext i16 %2 to i32 + ret i32 %3 +} + +define <2 x i32> @lhu_ri_inc(i16* %a) { +; CHECK-LABEL: lhu_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lhu a1, (a0), 84 +; CHECK-NEXT: ret + %1 = load i16, i16* %a + %2 = zext i16 %1 to i32 + %3 = getelementptr i16, i16* %a, i32 42 + %4 = ptrtoint i16* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define <2 x i32> @lhu_rr_inc(i16* %a, i32 %b) { +; CHECK-LABEL: lhu_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: cv.lhu a1, (a0), a1 +; CHECK-NEXT: ret + %1 = load i16, i16* %a + %2 = zext i16 %1 to i32 + %3 = getelementptr i16, i16* %a, i32 %b + %4 = ptrtoint i16* %3 to i32 + %5 = insertelement <2 x i32> undef, i32 %4, i32 0 + %6 = insertelement <2 x i32> %5, i32 %2, i32 1 + ret <2 x i32> %6 +} + +define i32 @lhu_rr(i16* %a, i32 %b) { +; CHECK-LABEL: lhu_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: cv.lhu a0, a1(a0) +; CHECK-NEXT: ret + %1 = getelementptr i16, i16* %a, i32 %b + %2 = load i16, i16* %1 + %3 = zext i16 %2 to i32 + ret i32 %3 +} + +define <2 x i32> @lw_ri_inc(i32* %a) { +; CHECK-LABEL: lw_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.lw a1, (a0), 168 +; CHECK-NEXT: ret + %1 = load i32, i32* %a + %2 = getelementptr i32, i32* %a, i32 42 + %3 = ptrtoint i32* %2 to i32 + %4 = insertelement <2 x i32> undef, i32 %3, i32 0 + %5 = insertelement <2 x i32> %4, i32 %1, i32 1 + ret <2 x i32> %5 +} + +define <2 x i32> @lw_rr_inc(i32* %a, i32 %b) { +; CHECK-LABEL: lw_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: cv.lw a1, (a0), a1 +; CHECK-NEXT: ret + %1 = load i32, i32* %a + %2 = getelementptr i32, i32* %a, i32 %b + %3 = ptrtoint i32* %2 to i32 + %4 = insertelement <2 x i32> undef, i32 %3, i32 0 + %5 = insertelement <2 x i32> %4, i32 %1, i32 1 + ret <2 x i32> %5 +} + +define i32 @lw_rr(i32* %a, i32 %b) { +; CHECK-LABEL: lw_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: cv.lw a0, a1(a0) +; CHECK-NEXT: ret + %1 = getelementptr i32, i32* %a, i32 %b + %2 = load i32, i32* %1 + ret i32 %2 +} + +define i8* @sb_ri_inc(i8* %a, i8 %b) { +; CHECK-LABEL: sb_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.sb a1, (a0), 42 +; CHECK-NEXT: ret + store i8 %b, i8* %a + %1 = getelementptr i8, i8* %a, i32 42 + ret i8* %1 +} + +define i8* @sb_rr_inc(i8* %a, i8 %b, i32 %c) { +; CHECK-LABEL: sb_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.sb a1, (a0), a2 +; CHECK-NEXT: ret + store i8 %b, i8* %a + %1 = getelementptr i8, i8* %a, i32 %c + ret i8* %1 +} + +define void @sb_rr(i8* %a, i8 %b, i32 %c) { +; CHECK-LABEL: sb_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.sb a1, a2(a0) +; CHECK-NEXT: ret + %1 = getelementptr i8, i8* %a, i32 %c + store i8 %b, i8* %1 + ret void +} + +define i16* @sh_ri_inc(i16* %a, i16 %b) { +; CHECK-LABEL: sh_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.sh a1, (a0), 84 +; CHECK-NEXT: ret + store i16 %b, i16* %a + %1 = getelementptr i16, i16* %a, i32 42 + ret i16* %1 +} + +define i16* @sh_rr_inc(i16* %a, i16 %b, i32 %c) { +; CHECK-LABEL: sh_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: cv.sh a1, (a0), a2 +; CHECK-NEXT: ret + store i16 %b, i16* %a + %1 = getelementptr i16, i16* %a, i32 %c + ret i16* %1 +} + +define void @sh_rr(i16* %a, i16 %b, i32 %c) { +; CHECK-LABEL: sh_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: cv.sh a1, a2(a0) +; CHECK-NEXT: ret + %1 = getelementptr i16, i16* %a, i32 %c + store i16 %b, i16* %1 + ret void +} + +define i32* @sw_ri_inc(i32* %a, i32 %b) { +; CHECK-LABEL: sw_ri_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.sw a1, (a0), 168 +; CHECK-NEXT: ret + store i32 %b, i32* %a + %1 = getelementptr i32, i32* %a, i32 42 + ret i32* %1 +} + +define i32* @sw_rr_inc(i32* %a, i32 %b, i32 %c) { +; CHECK-LABEL: sw_rr_inc: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: cv.sw a1, (a0), a2 +; CHECK-NEXT: ret + store i32 %b, i32* %a + %1 = getelementptr i32, i32* %a, i32 %c + ret i32* %1 +} + +define void @sw_rr(i32* %a, i32 %b, i32 %c) { +; CHECK-LABEL: sw_rr: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: cv.sw a1, a2(a0) +; CHECK-NEXT: ret + %1 = getelementptr i32, i32* %a, i32 %c + store i32 %b, i32* %1 + ret void +}