Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Improve use of BSETI/BCLRI in constant materialization. #91546

Merged
merged 6 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 36 additions & 44 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/APInt.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/Support/Debug.h"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think these header files are required.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They aren't. I was using them for debug printing and forgot to remove them

#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;

static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
Expand Down Expand Up @@ -310,56 +312,46 @@ InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) {
}
}

// Perform optimization with BCLRI/BSETI in the Zbs extension.
// Perform optimization with BSETI in the Zbs extension.
if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
// 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
// call generateInstSeqImpl with Val|0x80000000 (which is expected be
// an int32), then emit (BCLRI r, 31).
// 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
// with Val&~0x80000000 (which is expected to be an int32), then
// emit (BSETI r, 31).
int64_t NewVal;
unsigned Opc;
if (Val < 0) {
Opc = RISCV::BCLRI;
NewVal = Val | 0x80000000ll;
} else {
Opc = RISCV::BSETI;
NewVal = Val & ~0x80000000ll;
}
if (isInt<32>(NewVal)) {
RISCVMatInt::InstSeq TmpSeq;
generateInstSeqImpl(NewVal, STI, TmpSeq);
if ((TmpSeq.size() + 1) < Res.size()) {
TmpSeq.emplace_back(Opc, 31);
Res = TmpSeq;
}
// Create a simm32 value for LUI+ADDIW with by forcing the upper 33 bits to
// zero. Xor that with original value to get which bits need to be set by
// BSETI.
uint64_t Lo = Val & 0x7fffffff;
uint64_t Hi = Val ^ Lo;
assert(Hi != 0);
RISCVMatInt::InstSeq TmpSeq;

if (Lo != 0)
generateInstSeqImpl(Lo, STI, TmpSeq);

if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
do {
TmpSeq.emplace_back(RISCV::BSETI, llvm::countr_zero(Hi));
Hi &= (Hi - 1); // Clear lowest set bit.
} while (Hi != 0);
Res = TmpSeq;
}
}

// Perform optimization with BCLRI in the Zbs extension.
if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
// Create a simm32 value for LUI+ADDIW with by forcing the upper 33 bits to
// one. Xor that with original value to get which bits need to be cleared by
// BCLRI.
uint64_t Lo = Val | 0xffffffff80000000;
uint64_t Hi = Val ^ Lo;
assert(Hi != 0);

// Try to use BCLRI for upper 32 bits if the original lower 32 bits are
// negative int32, or use BSETI for upper 32 bits if the original lower
// 32 bits are positive int32.
int32_t Lo = Lo_32(Val);
uint32_t Hi = Hi_32(Val);
Opc = 0;
RISCVMatInt::InstSeq TmpSeq;
generateInstSeqImpl(Lo, STI, TmpSeq);
// Check if it is profitable to use BCLRI/BSETI.
if (Lo > 0 && TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
Opc = RISCV::BSETI;
} else if (Lo < 0 && TmpSeq.size() + llvm::popcount(~Hi) < Res.size()) {
Opc = RISCV::BCLRI;
Hi = ~Hi;
}
// Search for each bit and build corresponding BCLRI/BSETI.
if (Opc > 0) {
while (Hi != 0) {
unsigned Bit = llvm::countr_zero(Hi);
TmpSeq.emplace_back(Opc, Bit + 32);

if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
do {
TmpSeq.emplace_back(RISCV::BCLRI, llvm::countr_zero(Hi));
Hi &= (Hi - 1); // Clear lowest set bit.
}
if (TmpSeq.size() < Res.size())
Res = TmpSeq;
} while (Hi != 0);
Res = TmpSeq;
}
}

Expand Down
17 changes: 7 additions & 10 deletions llvm/test/CodeGen/RISCV/imm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4025,9 +4025,8 @@ define i64 @imm64_0x8000080000000() {
;
; RV64IZBS-LABEL: imm64_0x8000080000000:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 256
; RV64IZBS-NEXT: addiw a0, a0, 1
; RV64IZBS-NEXT: slli a0, a0, 31
; RV64IZBS-NEXT: bseti a0, zero, 31
; RV64IZBS-NEXT: bseti a0, a0, 51
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm64_0x8000080000000:
Expand Down Expand Up @@ -4083,9 +4082,8 @@ define i64 @imm64_0x10000100000000() {
;
; RV64IZBS-LABEL: imm64_0x10000100000000:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 256
; RV64IZBS-NEXT: addi a0, a0, 1
; RV64IZBS-NEXT: slli a0, a0, 32
; RV64IZBS-NEXT: bseti a0, zero, 32
; RV64IZBS-NEXT: bseti a0, a0, 52
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm64_0x10000100000000:
Expand Down Expand Up @@ -4146,10 +4144,9 @@ define i64 @imm64_0xFF7FFFFF7FFFFFFE() {
;
; RV64IZBS-LABEL: imm64_0xFF7FFFFF7FFFFFFE:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 1044480
; RV64IZBS-NEXT: addiw a0, a0, -1
; RV64IZBS-NEXT: slli a0, a0, 31
; RV64IZBS-NEXT: addi a0, a0, -1
; RV64IZBS-NEXT: li a0, -1
; RV64IZBS-NEXT: bclri a0, a0, 31
; RV64IZBS-NEXT: bclri a0, a0, 55
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm64_0xFF7FFFFF7FFFFFFE:
Expand Down
17 changes: 7 additions & 10 deletions llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2648,9 +2648,8 @@ define i64 @imm64_0x8000080000000() {
;
; RV64IZBS-LABEL: imm64_0x8000080000000:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 256
; RV64IZBS-NEXT: addiw a0, a0, 1
; RV64IZBS-NEXT: slli a0, a0, 31
; RV64IZBS-NEXT: bseti a0, zero, 31
; RV64IZBS-NEXT: bseti a0, a0, 51
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm64_0x8000080000000:
Expand Down Expand Up @@ -2686,9 +2685,8 @@ define i64 @imm64_0x10000100000000() {
;
; RV64IZBS-LABEL: imm64_0x10000100000000:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 256
; RV64IZBS-NEXT: addi a0, a0, 1
; RV64IZBS-NEXT: slli a0, a0, 32
; RV64IZBS-NEXT: bseti a0, zero, 32
; RV64IZBS-NEXT: bseti a0, a0, 52
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm64_0x10000100000000:
Expand Down Expand Up @@ -2727,10 +2725,9 @@ define i64 @imm64_0xFF7FFFFF7FFFFFFE() {
;
; RV64IZBS-LABEL: imm64_0xFF7FFFFF7FFFFFFE:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 1044480
; RV64IZBS-NEXT: addiw a0, a0, -1
; RV64IZBS-NEXT: slli a0, a0, 31
; RV64IZBS-NEXT: addi a0, a0, -1
; RV64IZBS-NEXT: li a0, -1
; RV64IZBS-NEXT: bclri a0, a0, 31
; RV64IZBS-NEXT: bclri a0, a0, 55
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm64_0xFF7FFFFF7FFFFFFE:
Expand Down
Loading