Skip to content

Commit

Permalink
[MC] Aligned bundling: remove special handling for RelaxAll
Browse files Browse the repository at this point in the history
When both aligned bundling and RelaxAll are enabled, bundle padding is
directly written into fragments (https://reviews.llvm.org/D8072).
(The original motivation was memory usage, which has been achieved from
different angles with recent assembler improvement).

The code presents challenges with the work to replace fragment
representation (e.g. #94950 #95077). This patch removes the special
handling. RelaxAll still works but the behavior seems slightly different
as revealed by 2 changed tests. However, most `-mc-relax-all` tests are
unchanged.

RelaxAll used to be the default for clang -O0. This mode has significant
code size drawbacks and newer Clang doesn't use it (#90013).

---

flushPendingLabels: The FOffset parameter can be removed: pending labels
will be assigned to the incoming fragment at offset 0.

Pull Request: #95188
  • Loading branch information
MaskRay authored Jun 14, 2024
1 parent 72b841d commit b1932b8
Show file tree
Hide file tree
Showing 10 changed files with 13 additions and 141 deletions.
8 changes: 0 additions & 8 deletions llvm/include/llvm/MC/MCELFStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class MCELFStreamer : public MCObjectStreamer {
/// state management
void reset() override {
SeenIdent = false;
BundleGroups.clear();
MCObjectStreamer::reset();
}

Expand Down Expand Up @@ -142,14 +141,7 @@ class MCELFStreamer : public MCObjectStreamer {
void finalizeCGProfileEntry(const MCSymbolRefExpr *&S, uint64_t Offset);
void finalizeCGProfile();

/// Merge the content of the fragment \p EF into the fragment \p DF.
void mergeFragment(MCDataFragment *, MCDataFragment *);

bool SeenIdent = false;

/// BundleGroups - The stack of fragments holding the bundle-locked
/// instructions.
SmallVector<MCDataFragment *, 4> BundleGroups;
};

MCELFStreamer *createARMELFStreamer(MCContext &Context,
Expand Down
3 changes: 1 addition & 2 deletions llvm/include/llvm/MC/MCSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,7 @@ class MCSection {
void addPendingLabel(MCSymbol* label, unsigned Subsection = 0);

/// Associate all pending labels in a subsection with a fragment.
void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0,
unsigned Subsection = 0);
void flushPendingLabels(MCFragment *F, unsigned Subsection);

/// Associate all pending labels with empty data fragments. One fragment
/// will be created for each subsection as necessary.
Expand Down
3 changes: 0 additions & 3 deletions llvm/include/llvm/MC/MCWasmStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,6 @@ class MCWasmStreamer : public MCObjectStreamer {

void fixSymbolsInTLSFixups(const MCExpr *expr);

/// Merge the content of the fragment \p EF into the fragment \p DF.
void mergeFragment(MCDataFragment *, MCDataFragment *);

bool SeenIdent;
};

Expand Down
8 changes: 1 addition & 7 deletions llvm/lib/MC/MCAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,12 +420,6 @@ void MCAsmLayout::layoutBundle(MCFragment *F) {
// The fragment's offset will point to after the padding, and its computed
// size won't include the padding.
//
// When the -mc-relax-all flag is used, we optimize bundling by writting the
// padding directly into fragments when the instructions are emitted inside
// the streamer. When the fragment is larger than the bundle size, we need to
// ensure that it's bundle aligned. This means that if we end up with
// multiple fragments, we must emit bundle padding between fragments.
//
// ".align N" is an example of a directive that introduces multiple
// fragments. We could add a special case to handle ".align N" by emitting
// within-fragment padding (which would produce less padding when N is less
Expand All @@ -436,7 +430,7 @@ void MCAsmLayout::layoutBundle(MCFragment *F) {
MCEncodedFragment *EF = cast<MCEncodedFragment>(F);
uint64_t FSize = Assembler.computeFragmentSize(*this, *EF);

if (!Assembler.getRelaxAll() && FSize > Assembler.getBundleAlignSize())
if (FSize > Assembler.getBundleAlignSize())
report_fatal_error("Fragment can't be larger than a bundle size");

uint64_t RequiredBundlePadding =
Expand Down
89 changes: 3 additions & 86 deletions llvm/lib/MC/MCELFStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,44 +50,6 @@ bool MCELFStreamer::isBundleLocked() const {
return getCurrentSectionOnly()->isBundleLocked();
}

void MCELFStreamer::mergeFragment(MCDataFragment *DF,
MCDataFragment *EF) {
MCAssembler &Assembler = getAssembler();

if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
uint64_t FSize = EF->getContents().size();

if (FSize > Assembler.getBundleAlignSize())
report_fatal_error("Fragment can't be larger than a bundle size");

uint64_t RequiredBundlePadding = computeBundlePadding(
Assembler, EF, DF->getContents().size(), FSize);

if (RequiredBundlePadding > UINT8_MAX)
report_fatal_error("Padding cannot exceed 255 bytes");

if (RequiredBundlePadding > 0) {
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
Assembler.writeFragmentPadding(VecOS, *EF, FSize);

DF->getContents().append(Code.begin(), Code.end());
}
}

flushPendingLabels(DF, DF->getContents().size());

for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
DF->getContents().size());
DF->getFixups().push_back(EF->getFixups()[i]);
}
if (DF->getSubtargetInfo() == nullptr && EF->getSubtargetInfo())
DF->setHasInstructions(*EF->getSubtargetInfo());
DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
}

void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
MCContext &Ctx = getContext();
switchSection(Ctx.getObjectFileInfo()->getTextSection());
Expand Down Expand Up @@ -575,24 +537,12 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst,

if (Assembler.isBundlingEnabled()) {
MCSection &Sec = *getCurrentSectionOnly();
if (Assembler.getRelaxAll() && isBundleLocked()) {
// If the -mc-relax-all flag is used and we are bundle-locked, we re-use
// the current bundle group.
DF = BundleGroups.back();
CheckBundleSubtargets(DF->getSubtargetInfo(), &STI);
}
else if (Assembler.getRelaxAll() && !isBundleLocked())
// When not in a bundle-locked group and the -mc-relax-all flag is used,
// we create a new temporary fragment which will be later merged into
// the current fragment.
DF = getContext().allocFragment<MCDataFragment>();
else if (isBundleLocked() && !Sec.isBundleGroupBeforeFirstInst()) {
if (isBundleLocked() && !Sec.isBundleGroupBeforeFirstInst()) {
// If we are bundle-locked, we re-use the current fragment.
// The bundle-locking directive ensures this is a new data fragment.
DF = cast<MCDataFragment>(getCurrentFragment());
CheckBundleSubtargets(DF->getSubtargetInfo(), &STI);
}
else if (!isBundleLocked() && Fixups.size() == 0) {
} else if (!isBundleLocked() && Fixups.size() == 0) {
// Optimize memory usage by emitting the instruction to a
// MCCompactEncodedInstFragment when not in a bundle-locked group and
// there are no fixups registered.
Expand Down Expand Up @@ -632,13 +582,6 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst,
getAssembler().getBackend().RelaxFixupKind)
DF->setLinkerRelaxable();
DF->getContents().append(Code.begin(), Code.end());

if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
if (!isBundleLocked()) {
mergeFragment(getOrCreateDataFragment(&STI), DF);
delete DF;
}
}
}

void MCELFStreamer::emitBundleAlignMode(Align Alignment) {
Expand All @@ -660,12 +603,6 @@ void MCELFStreamer::emitBundleLock(bool AlignToEnd) {
if (!isBundleLocked())
Sec.setBundleGroupBeforeFirstInst(true);

if (getAssembler().getRelaxAll() && !isBundleLocked()) {
// TODO: drop the lock state and set directly in the fragment
MCDataFragment *DF = getContext().allocFragment<MCDataFragment>();
BundleGroups.push_back(DF);
}

Sec.setBundleLockState(AlignToEnd ? MCSection::BundleLockedAlignToEnd
: MCSection::BundleLocked);
}
Expand All @@ -680,27 +617,7 @@ void MCELFStreamer::emitBundleUnlock() {
else if (Sec.isBundleGroupBeforeFirstInst())
report_fatal_error("Empty bundle-locked group is forbidden");

// When the -mc-relax-all flag is used, we emit instructions to fragments
// stored on a stack. When the bundle unlock is emitted, we pop a fragment
// from the stack a merge it to the one below.
if (getAssembler().getRelaxAll()) {
assert(!BundleGroups.empty() && "There are no bundle groups");
MCDataFragment *DF = BundleGroups.back();

// FIXME: Use BundleGroups to track the lock state instead.
Sec.setBundleLockState(MCSection::NotBundleLocked);

// FIXME: Use more separate fragments for nested groups.
if (!isBundleLocked()) {
mergeFragment(getOrCreateDataFragment(DF->getSubtargetInfo()), DF);
BundleGroups.pop_back();
delete DF;
}

if (Sec.getBundleLockState() != MCSection::BundleLockedAlignToEnd)
getOrCreateDataFragment()->setAlignToBundleEnd(false);
} else
Sec.setBundleLockState(MCSection::NotBundleLocked);
Sec.setBundleLockState(MCSection::NotBundleLocked);
}

void MCELFStreamer::finishImpl() {
Expand Down
10 changes: 3 additions & 7 deletions llvm/lib/MC/MCObjectStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
}

// Associate the labels with F.
CurSection->flushPendingLabels(F, FOffset, CurSubsectionIdx);
CurSection->flushPendingLabels(F, CurSubsectionIdx);
}

void MCObjectStreamer::flushPendingLabels() {
Expand Down Expand Up @@ -215,7 +215,7 @@ static bool canReuseDataFragment(const MCDataFragment &F,
// When bundling is enabled, we don't want to add data to a fragment that
// already has instructions (see MCELFStreamer::emitInstToData for details)
if (Assembler.isBundlingEnabled())
return Assembler.getRelaxAll();
return false;
// If the subtarget is changed mid fragment we start a new fragment to record
// the new STI.
return !STI || F.getSubtargetInfo() == STI;
Expand Down Expand Up @@ -292,8 +292,7 @@ void MCObjectStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
// Otherwise queue the label and set its fragment pointer when we emit the
// next fragment.
auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
if (F && !(getAssembler().isBundlingEnabled() &&
getAssembler().getRelaxAll())) {
if (F) {
Symbol->setFragment(F);
Symbol->setOffset(F->getContents().size());
} else {
Expand Down Expand Up @@ -465,9 +464,6 @@ void MCObjectStreamer::emitInstructionImpl(const MCInst &Inst,

void MCObjectStreamer::emitInstToFragment(const MCInst &Inst,
const MCSubtargetInfo &STI) {
if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled())
llvm_unreachable("All instructions should have already been relaxed");

// Always create a new, separate fragment here, because its size can change
// during relaxation.
MCRelaxableFragment *IF =
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/MC/MCSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,14 @@ void MCSection::addPendingLabel(MCSymbol *label, unsigned Subsection) {
PendingLabels.push_back(PendingLabel(label, Subsection));
}

void MCSection::flushPendingLabels(MCFragment *F, uint64_t FOffset,
unsigned Subsection) {
void MCSection::flushPendingLabels(MCFragment *F, unsigned Subsection) {
// Set the fragment and fragment offset for all pending symbols in the
// specified Subsection, and remove those symbols from the pending list.
for (auto It = PendingLabels.begin(); It != PendingLabels.end(); ++It) {
PendingLabel& Label = *It;
if (Label.Subsection == Subsection) {
Label.Sym->setFragment(F);
Label.Sym->setOffset(FOffset);
assert(Label.Sym->getOffset() == 0);
PendingLabels.erase(It--);
}
}
Expand All @@ -105,7 +104,7 @@ void MCSection::flushPendingLabels() {
MCFragment *F = new MCDataFragment();
addFragment(*F);
F->setParent(this);
flushPendingLabels(F, 0, Label.Subsection);
flushPendingLabels(F, Label.Subsection);
}
}

Expand Down
13 changes: 0 additions & 13 deletions llvm/lib/MC/MCWasmStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,6 @@ using namespace llvm;

MCWasmStreamer::~MCWasmStreamer() = default; // anchor.

void MCWasmStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) {
flushPendingLabels(DF, DF->getContents().size());

for (unsigned I = 0, E = EF->getFixups().size(); I != E; ++I) {
EF->getFixups()[I].setOffset(EF->getFixups()[I].getOffset() +
DF->getContents().size());
DF->getFixups().push_back(EF->getFixups()[I]);
}
if (DF->getSubtargetInfo() == nullptr && EF->getSubtargetInfo())
DF->setHasInstructions(*EF->getSubtargetInfo());
DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
}

void MCWasmStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
auto *Symbol = cast<MCSymbolWasm>(S);
MCObjectStreamer::emitLabel(Symbol, Loc);
Expand Down
6 changes: 1 addition & 5 deletions llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentiumpro -mc-relax-all %s -o - \
# RUN: | llvm-objdump -d --no-show-raw-insn - \
# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
# RUN: | FileCheck --check-prefixes=CHECK,CHECK-OPT %s

.text
foo:
Expand All @@ -13,11 +13,7 @@ foo:
.bundle_lock align_to_end
# CHECK: 1: nopw %cs:(%eax,%eax)
# CHECK: 10: nopw %cs:(%eax,%eax)
# CHECK-RELAX: 1a: nop
# CHECK-RELAX: 20: nopw %cs:(%eax,%eax)
# CHECK-RELAX: 2a: nopw %cs:(%eax,%eax)
# CHECK-OPT: 1b: calll 0x1c
# CHECK-RELAX: 3b: calll 0x3c
calll bar # 5 bytes
.bundle_unlock
ret # 1 byte
7 changes: 1 addition & 6 deletions llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,20 @@
# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentiumpro -mc-relax-all %s -o - \
# RUN: | llvm-objdump --no-print-imm-hex -d --no-show-raw-insn - \
# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
# RUN: | FileCheck --check-prefixes=CHECK,CHECK-OPT %s

.text
foo:
.bundle_align_mode 5
push %ebp # 1 byte
.align 16
# CHECK: 1: nopw %cs:(%eax,%eax)
# CHECK-RELAX: 10: nopw %cs:(%eax,%eax)
# CHECK-RELAX: 1a: nop
# CHECK-OPT: 10: movl $1, (%esp)
# CHECK-RELAX: 20: movl $1, (%esp)
movl $0x1, (%esp) # 7 bytes
movl $0x1, (%esp) # 7 bytes
# CHECK-OPT: 1e: nop
movl $0x2, 0x1(%esp) # 8 bytes
movl $0x2, 0x1(%esp) # 8 bytes
# CHECK-RELAX: 3e: nop
# CHECK-RELAX: 40: movl $2, 1(%esp)
movl $0x2, 0x1(%esp) # 8 bytes
movl $0x2, (%esp) # 7 bytes
# CHECK-OPT: 3f: nop
Expand Down

0 comments on commit b1932b8

Please sign in to comment.