From 3eaed9e6f574f59d76389c055b047ef5c50afb8a Mon Sep 17 00:00:00 2001 From: melonedo <44501064+melonedo@users.noreply.github.com> Date: Sun, 17 Dec 2023 19:29:40 +0800 Subject: [PATCH] [RISCV] Implement intrinsics for XCVbitmanip extension in CV32E40P (#74993) Implement XCVbitmanip intrinsics for CV32E40P according to the specification. This commit is part of a patch-set to upstream the vendor specific extensions of CV32E40P that need LLVM intrinsics to implement Clang builtins. Contributors: @CharKeaney, @ChunyuLiao, @jeremybennett, @lewis-revill, @NandniJamnadas, @PaoloS02, @simonpcook, @xingmingjie. Spec: https://github.com/openhwgroup/core-v-sw/blob/05481cf0ef7aa7b09067b14ff3f71faead7ba310/specifications/corev-builtin-spec.md#listing-of-pulp-bit-manipulation-builtins-xcvbitmanip. Previously reviewed on Phabricator: https://reviews.llvm.org/D157510. Parallel GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635795.html. Co-authored-by: melonedo --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 1 + llvm/include/llvm/IR/IntrinsicsRISCVXCV.td | 37 +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 23 +- llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td | 48 +++- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +- llvm/test/CodeGen/RISCV/xcvbitmanip.ll | 231 ++++++++++++++++++ 6 files changed, 335 insertions(+), 9 deletions(-) create mode 100644 llvm/include/llvm/IR/IntrinsicsRISCVXCV.td create mode 100644 llvm/test/CodeGen/RISCV/xcvbitmanip.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 20c6a525a86ba7..fc830fca392fc5 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1879,3 +1879,4 @@ let TargetPrefix = "riscv" in { //===----------------------------------------------------------------------===// include "llvm/IR/IntrinsicsRISCVXTHead.td" include "llvm/IR/IntrinsicsRISCVXsf.td" +include "llvm/IR/IntrinsicsRISCVXCV.td" diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td new file mode 100644 index 00000000000000..f1590ad66e362b --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td @@ -0,0 +1,37 @@ +//===- IntrinsicsRISCVXCV.td - CORE-V intrinsics -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the CORE-V vendor intrinsics for RISC-V. +// +//===----------------------------------------------------------------------===// + +class ScalarCoreVBitManipGprGprIntrinsic + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + +class ScalarCoreVBitManipGprIntrinsic + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + +let TargetPrefix = "riscv" in { + def int_riscv_cv_bitmanip_extract : ScalarCoreVBitManipGprGprIntrinsic; + def int_riscv_cv_bitmanip_extractu : ScalarCoreVBitManipGprGprIntrinsic; + def int_riscv_cv_bitmanip_bclr : ScalarCoreVBitManipGprGprIntrinsic; + def int_riscv_cv_bitmanip_bset : ScalarCoreVBitManipGprGprIntrinsic; + + def int_riscv_cv_bitmanip_insert + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + + def int_riscv_cv_bitmanip_clb : ScalarCoreVBitManipGprIntrinsic; + + def int_riscv_cv_bitmanip_bitrev + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, IntrSpeculatable, + ImmArg>, ImmArg>]>; +} // TargetPrefix = "riscv" diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4a8ff73ec47295..782a9e1db569f5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -335,6 +335,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom); + } else if (Subtarget.hasVendorXCVbitmanip()) { + setOperationAction({ISD::ROTL}, XLenVT, Expand); } else { setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); if (RV64LegalI32 && Subtarget.is64Bit()) @@ -355,9 +357,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ? Promote : Expand); - // Zbkb can use rev8+brev8 to implement bitreverse. - setOperationAction(ISD::BITREVERSE, XLenVT, - Subtarget.hasStdExtZbkb() ? Custom : Expand); + + if (Subtarget.hasVendorXCVbitmanip()) { + setOperationAction(ISD::BITREVERSE, XLenVT, Legal); + } else { + // Zbkb can use rev8+brev8 to implement bitreverse. + setOperationAction(ISD::BITREVERSE, XLenVT, + Subtarget.hasStdExtZbkb() ? Custom : Expand); + } if (Subtarget.hasStdExtZbb()) { setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, @@ -372,13 +379,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, else setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); } - } else { + } else if (!Subtarget.hasVendorXCVbitmanip()) { setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand); if (RV64LegalI32 && Subtarget.is64Bit()) setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand); } - if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) { + if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || + Subtarget.hasVendorXCVbitmanip()) { // We need the custom lowering to make sure that the resulting sequence // for the 32bit case is efficient on 64bit targets. if (Subtarget.is64Bit()) { @@ -1796,11 +1804,12 @@ bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { } bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { - return Subtarget.hasStdExtZbb(); + return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); } bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { - return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb(); + return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || + Subtarget.hasVendorXCVbitmanip(); } bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index 6622e811bbb86d..924e91e15c348f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -33,7 +33,7 @@ let DecoderNamespace = "XCVbitmanip" in { class CVBitManipR funct7, string opcodestr> : RVInstR { + (ins GPR:$rs1), opcodestr, "$rd, $rs1"> { let rs2 = 0b00000; } } @@ -658,3 +658,49 @@ let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 0, // Event load def CV_ELW : CVLoad_ri<0b011, "cv.elw">; } + +def cv_tuimm2 : TImmLeaf(Imm);}]>; +def cv_tuimm5 : TImmLeaf(Imm);}]>; +def cv_uimm10 : ImmLeaf(Imm);}]>; + +def CV_LO5: SDNodeXFormgetTargetConstant(N->getZExtValue() & 0x1f, SDLoc(N), + N->getValueType(0)); +}]>; + +def CV_HI5: SDNodeXFormgetTargetConstant(N->getZExtValue() >> 5, SDLoc(N), + N->getValueType(0)); +}]>; + +multiclass PatCoreVBitManip { + def : PatGprGpr("CV_" # NAME # "R")>; + def : Pat<(intr GPR:$rs1, cv_uimm10:$imm), + (!cast("CV_" # NAME) + GPR:$rs1, (CV_HI5 cv_uimm10:$imm), (CV_LO5 cv_uimm10:$imm))>; +} + +let Predicates = [HasVendorXCVbitmanip, IsRV32] in { + defm EXTRACT : PatCoreVBitManip; + defm EXTRACTU : PatCoreVBitManip; + defm BCLR : PatCoreVBitManip; + defm BSET : PatCoreVBitManip; + + def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, GPR:$rs2, GPR:$rd), + (CV_INSERTR GPR:$rd, GPR:$rs1, GPR:$rs2)>; + def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, cv_uimm10:$imm, GPR:$rd), + (CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm), + (CV_LO5 cv_uimm10:$imm))>; + + def : PatGpr; + def : PatGpr; + def : PatGpr; + def : PatGpr; + + def : PatGprGpr; + + def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts, + cv_tuimm2:$radix), + (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>; + def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 3a2f2f39cd1c9b..4614446b2150b7 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -172,7 +172,9 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, TargetTransformInfo::PopcntSupportKind RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software; + return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip() + ? TTI::PSK_FastHardware + : TTI::PSK_Software; } bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { diff --git a/llvm/test/CodeGen/RISCV/xcvbitmanip.ll b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll new file mode 100644 index 00000000000000..d25ff28475c4b7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll @@ -0,0 +1,231 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=riscv32 -mattr=+xcvbitmanip -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-O0 +; RUN: llc -O3 -mtriple=riscv32 -mattr=+xcvbitmanip -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-O3 + +declare i32 @llvm.riscv.cv.bitmanip.extract(i32, i32) + +define i32 @test.cv.extractr(i32 %a, i32 %b) { +; CHECK-LABEL: test.cv.extractr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.extractr a0, a0, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.extract(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @test.cv.extract(i32 %a) { +; CHECK-LABEL: test.cv.extract: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.extract a0, a0, 2, 1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.extract(i32 %a, i32 65) + ret i32 %1 +} + +define i32 @test.cv.extract1023(i32 %a) { +; CHECK-LABEL: test.cv.extract1023: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.extract a0, a0, 31, 31 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.extract(i32 %a, i32 1023) + ret i32 %1 +} + +declare i32 @llvm.riscv.cv.bitmanip.extractu(i32, i32) + +define i32 @test.cv.extractur(i32 %a, i32 %b) { +; CHECK-LABEL: test.cv.extractur: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.extractur a0, a0, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.extractu(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @test.cv.extractu(i32 %a) { +; CHECK-LABEL: test.cv.extractu: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.extractu a0, a0, 2, 1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.extractu(i32 %a, i32 65) + ret i32 %1 +} + +declare i32 @llvm.riscv.cv.bitmanip.insert(i32, i32, i32) + +define i32 @test.cv.insert(i32 %c, i32 %a) { +; CHECK-LABEL: test.cv.insert: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.insert a0, a1, 2, 1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.insert(i32 %a, i32 65, i32 %c) + ret i32 %1 +} + +define i32 @test.cv.insertr(i32 %c, i32 %b, i32 %a) { +; CHECK-LABEL: test.cv.insertr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.insertr a0, a2, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.insert(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i32 @llvm.riscv.cv.bitmanip.bclr(i32, i32) + +define i32 @test.cv.bclrr(i32 %a, i32 %b) { +; CHECK-LABEL: test.cv.bclrr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.bclrr a0, a0, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.bclr(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @test.cv.bclr(i32 %a) { +; CHECK-LABEL: test.cv.bclr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.bclr a0, a0, 2, 1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.bclr(i32 %a, i32 65) + ret i32 %1 +} + +declare i32 @llvm.riscv.cv.bitmanip.bset(i32, i32) + +define i32 @test.cv.bsetr(i32 %a, i32 %b) { +; CHECK-LABEL: test.cv.bsetr: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.bsetr a0, a0, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.bset(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @test.cv.bset(i32 %a) { +; CHECK-LABEL: test.cv.bset: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.bset a0, a0, 2, 1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.bset(i32 %a, i32 65) + ret i32 %1 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define i32 @test.cv.ff1(i32 %a) { +; CHECK-LABEL: test.cv.ff1: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.ff1 a0, a0 +; CHECK-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 0) + ret i32 %1 +} + +declare i32 @llvm.ctlz.i32(i32, i1) + +define i32 @test.cv.fl1(i32 %a) { +; CHECK-LABEL: test.cv.fl1: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.fl1 a0, a0 +; CHECK-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 0) + ret i32 %1 +} + +declare i32 @llvm.riscv.cv.bitmanip.clb(i32) + +define i32 @test.cv.clb(i32 %a) { +; CHECK-LABEL: test.cv.clb: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.clb a0, a0 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.clb(i32 %a) + ret i32 %1 +} + +declare i32 @llvm.ctpop(i32) + +define i32 @test.cv.cnt(i32 %a) { +; CHECK-LABEL: test.cv.cnt: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.cnt a0, a0 +; CHECK-NEXT: ret + %1 = call i32 @llvm.ctpop(i32 %a) + ret i32 %1 +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define i32 @test.llvm.fshl.imm(i32 %a) { +; CHECK-LABEL: test.llvm.fshl.imm: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: cv.ror a0, a0, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 2) + ret i32 %1 +} + +define i32 @test.llvm.fshl.reg(i32 %a, i32 %b) { +; CHECK-O0-LABEL: test.llvm.fshl.reg: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: mv a2, a1 +; CHECK-O0-NEXT: li a1, 0 +; CHECK-O0-NEXT: sub a1, a1, a2 +; CHECK-O0-NEXT: cv.ror a0, a0, a1 +; CHECK-O0-NEXT: ret +; +; CHECK-O3-LABEL: test.llvm.fshl.reg: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: neg a1, a1 +; CHECK-O3-NEXT: cv.ror a0, a0, a1 +; CHECK-O3-NEXT: ret + %1 = call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b) + ret i32 %1 +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define i32 @test.llvm.fshr.imm(i32 %a) { +; CHECK-LABEL: test.llvm.fshr.imm: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: cv.ror a0, a0, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 2) + ret i32 %1 +} + +define i32 @test.llvm.fshr.reg(i32 %a, i32 %b) { +; CHECK-LABEL: test.llvm.fshr.reg: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.ror a0, a0, a1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b) + ret i32 %1 +} + +declare i32 @llvm.riscv.cv.bitmanip.bitrev(i32, i32, i32) + +define i32 @test.cv.bitrev(i32 %a) { +; CHECK-LABEL: test.cv.bitrev: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.bitrev a0, a0, 2, 1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.riscv.cv.bitmanip.bitrev(i32 %a, i32 1, i32 2) + ret i32 %1 +} + +declare i32 @llvm.bitreverse(i32) + +define i32 @test.llvm.bitrev(i32 %a) { +; CHECK-LABEL: test.llvm.bitrev: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.bitrev a0, a0, 0, 0 +; CHECK-NEXT: ret + %1 = call i32 @llvm.bitreverse(i32 %a) + ret i32 %1 +}