Skip to content

Commit

Permalink
Recommit "[lldb] Don't dissasemble large functions by default"
Browse files Browse the repository at this point in the history
This recommits f665e80 which was reverted in 1cbd1b8 for breaking
TestFoundationDisassembly.py. The fix is to use --force in the test to avoid
bailing out on large functions.

I have also doubled the large function limit to 8000 bytes (~~ 2000 insns), as
the foundation library contains a lot of large-ish functions. The intent of this
feature is to prevent accidental disassembling of enormous (multi-megabyte)
"functions", not to get in people's way.

The original commit message follows:

If we have a binary without symbol information (and without
LC_FUNCTION_STARTS, if on a mac), then we have to resort to using
heuristics to determine the function boundaries. However, these don't
always work, and so we can easily end up thinking we have functions
which are several megabytes in size. Attempting to (accidentally)
disassemble these can take a very long time spam the terminal with
thousands of lines of disassembly.

This patch works around that problem by adding a sanity check to the
disassemble command. If we are about to disassemble a function which is
larger than a certain threshold, we will refuse to disassemble such a
function unless the user explicitly specifies the number of instructions
to disassemble, uses start/stop addresses for disassembly, or passes the
(new) --force argument.

The threshold is currently fairly aggressive (4000 bytes ~~ 1000
instructions). If needed, we can increase it, or even make it
configurable.

Differential Revision: https://reviews.llvm.org/D79789
  • Loading branch information
labath committed May 15, 2020
1 parent 969c63a commit 8b845ac
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 23 deletions.
58 changes: 47 additions & 11 deletions lldb/source/Commands/CommandObjectDisassemble.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
#include "lldb/Target/StackFrame.h"
#include "lldb/Target/Target.h"

#define DEFAULT_DISASM_BYTE_SIZE 32
#define DEFAULT_DISASM_NUM_INS 4
static constexpr unsigned default_disasm_byte_size = 32;
static constexpr unsigned default_disasm_num_ins = 4;
static constexpr unsigned large_function_threshold = 8000;

using namespace lldb;
using namespace lldb_private;
Expand Down Expand Up @@ -143,6 +144,10 @@ Status CommandObjectDisassemble::CommandOptions::SetOptionValue(
}
} break;

case '\x01':
force = true;
break;

default:
llvm_unreachable("Unimplemented option");
}
Expand Down Expand Up @@ -186,6 +191,7 @@ void CommandObjectDisassemble::CommandOptions::OptionParsingStarting(

arch.Clear();
some_location_specified = false;
force = false;
}

Status CommandObjectDisassemble::CommandOptions::OptionParsingFinished(
Expand Down Expand Up @@ -214,6 +220,21 @@ CommandObjectDisassemble::CommandObjectDisassemble(

CommandObjectDisassemble::~CommandObjectDisassemble() = default;

llvm::Error CommandObjectDisassemble::CheckRangeSize(const AddressRange &range,
llvm::StringRef what) {
if (m_options.num_instructions > 0 || m_options.force ||
range.GetByteSize() < large_function_threshold)
return llvm::Error::success();
StreamString msg;
msg << "Not disassembling " << what << " because it is very large ";
range.Dump(&msg, &GetSelectedTarget(), Address::DumpStyleLoadAddress,
Address::DumpStyleFileAddress);
msg << ". To disassemble specify an instruction count limit, start/stop "
"addresses or use the --force option.";
return llvm::createStringError(llvm::inconvertibleErrorCode(),
msg.GetString());
}

llvm::Expected<std::vector<AddressRange>>
CommandObjectDisassemble::GetContainingAddressRanges() {
std::vector<AddressRange> ranges;
Expand Down Expand Up @@ -254,6 +275,9 @@ CommandObjectDisassemble::GetContainingAddressRanges() {
"Could not find function bounds for address 0x%" PRIx64,
m_options.symbol_containing_addr);
}

if (llvm::Error err = CheckRangeSize(ranges[0], "the function"))
return std::move(err);
return ranges;
}

Expand All @@ -273,8 +297,10 @@ CommandObjectDisassemble::GetCurrentFunctionRanges() {
else if (sc.symbol && sc.symbol->ValueIsAddress()) {
range = {sc.symbol->GetAddress(), sc.symbol->GetByteSize()};
} else
range = {frame->GetFrameCodeAddress(), DEFAULT_DISASM_BYTE_SIZE};
range = {frame->GetFrameCodeAddress(), default_disasm_byte_size};

if (llvm::Error err = CheckRangeSize(range, "the current function"))
return std::move(err);
return std::vector<AddressRange>{range};
}

Expand All @@ -298,7 +324,7 @@ CommandObjectDisassemble::GetCurrentLineRanges() {
}

llvm::Expected<std::vector<AddressRange>>
CommandObjectDisassemble::GetNameRanges() {
CommandObjectDisassemble::GetNameRanges(CommandReturnObject &result) {
ConstString name(m_options.func_name.c_str());
const bool include_symbols = true;
const bool include_inlines = true;
Expand All @@ -309,6 +335,7 @@ CommandObjectDisassemble::GetNameRanges() {
name, eFunctionNameTypeAuto, include_symbols, include_inlines, sc_list);

std::vector<AddressRange> ranges;
llvm::Error range_errs = llvm::Error::success();
AddressRange range;
const uint32_t scope =
eSymbolContextBlock | eSymbolContextFunction | eSymbolContextSymbol;
Expand All @@ -317,14 +344,21 @@ CommandObjectDisassemble::GetNameRanges() {
for (uint32_t range_idx = 0;
sc.GetAddressRange(scope, range_idx, use_inline_block_range, range);
++range_idx) {
ranges.push_back(range);
if (llvm::Error err = CheckRangeSize(range, "a range"))
range_errs = joinErrors(std::move(range_errs), std::move(err));
else
ranges.push_back(range);
}
}
if (ranges.empty()) {
if (range_errs)
return std::move(range_errs);
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"Unable to find symbol with name '%s'.\n",
name.GetCString());
}
if (range_errs)
result.AppendWarning(toString(std::move(range_errs)));
return ranges;
}

Expand All @@ -340,7 +374,7 @@ CommandObjectDisassemble::GetPCRanges() {
if (m_options.num_instructions == 0) {
// Disassembling at the PC always disassembles some number of
// instructions (not the whole function).
m_options.num_instructions = DEFAULT_DISASM_NUM_INS;
m_options.num_instructions = default_disasm_num_ins;
}
return std::vector<AddressRange>{{frame->GetFrameCodeAddress(), 0}};
}
Expand All @@ -359,15 +393,16 @@ CommandObjectDisassemble::GetStartEndAddressRanges() {
}

llvm::Expected<std::vector<AddressRange>>
CommandObjectDisassemble::GetRangesForSelectedMode() {
CommandObjectDisassemble::GetRangesForSelectedMode(
CommandReturnObject &result) {
if (m_options.symbol_containing_addr != LLDB_INVALID_ADDRESS)
return CommandObjectDisassemble::GetContainingAddressRanges();
if (m_options.current_function)
return CommandObjectDisassemble::GetCurrentFunctionRanges();
if (m_options.frame_line)
return CommandObjectDisassemble::GetCurrentLineRanges();
if (!m_options.func_name.empty())
return CommandObjectDisassemble::GetNameRanges();
return CommandObjectDisassemble::GetNameRanges(result);
if (m_options.start_addr != LLDB_INVALID_ADDRESS)
return CommandObjectDisassemble::GetStartEndAddressRanges();
return CommandObjectDisassemble::GetPCRanges();
Expand Down Expand Up @@ -440,7 +475,8 @@ bool CommandObjectDisassemble::DoExecute(Args &command,
if (m_options.raw)
options |= Disassembler::eOptionRawOuput;

llvm::Expected<std::vector<AddressRange>> ranges = GetRangesForSelectedMode();
llvm::Expected<std::vector<AddressRange>> ranges =
GetRangesForSelectedMode(result);
if (!ranges) {
result.AppendError(toString(ranges.takeError()));
result.SetStatus(eReturnStatusFailed);
Expand All @@ -453,7 +489,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command,
if (m_options.num_instructions == 0) {
limit = {Disassembler::Limit::Bytes, cur_range.GetByteSize()};
if (limit.value == 0)
limit.value = DEFAULT_DISASM_BYTE_SIZE;
limit.value = default_disasm_byte_size;
} else {
limit = {Disassembler::Limit::Instructions, m_options.num_instructions};
}
Expand All @@ -476,7 +512,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command,
result.SetStatus(eReturnStatusFailed);
}
if (print_sc_header)
result.AppendMessage("\n");
result.GetOutputStream() << "\n";
}

return result.Succeeded();
Expand Down
9 changes: 7 additions & 2 deletions lldb/source/Commands/CommandObjectDisassemble.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class CommandObjectDisassemble : public CommandObjectParsed {
// "at_pc". This should be set
// in SetOptionValue if anything the selects a location is set.
lldb::addr_t symbol_containing_addr;
bool force = false;
};

CommandObjectDisassemble(CommandInterpreter &interpreter);
Expand All @@ -73,15 +74,19 @@ class CommandObjectDisassemble : public CommandObjectParsed {
protected:
bool DoExecute(Args &command, CommandReturnObject &result) override;

llvm::Expected<std::vector<AddressRange>> GetRangesForSelectedMode();
llvm::Expected<std::vector<AddressRange>>
GetRangesForSelectedMode(CommandReturnObject &result);

llvm::Expected<std::vector<AddressRange>> GetContainingAddressRanges();
llvm::Expected<std::vector<AddressRange>> GetCurrentFunctionRanges();
llvm::Expected<std::vector<AddressRange>> GetCurrentLineRanges();
llvm::Expected<std::vector<AddressRange>> GetNameRanges();
llvm::Expected<std::vector<AddressRange>>
GetNameRanges(CommandReturnObject &result);
llvm::Expected<std::vector<AddressRange>> GetPCRanges();
llvm::Expected<std::vector<AddressRange>> GetStartEndAddressRanges();

llvm::Error CheckRangeSize(const AddressRange &range, llvm::StringRef what);

CommandOptions m_options;
};

Expand Down
4 changes: 3 additions & 1 deletion lldb/source/Commands/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ let Command = "disassemble" in {
Desc<"Address at which to start disassembling.">;
def disassemble_options_end_address : Option<"end-address", "e">, Group<1>,
Arg<"AddressOrExpression">, Desc<"Address at which to end disassembling.">;
def disassemble_options_count : Option<"count", "c">, Groups<[2,3,4,5]>,
def disassemble_options_count : Option<"count", "c">, Groups<[2,3,4,5,7]>,
Arg<"NumLines">, Desc<"Number of instructions to display.">;
def disassemble_options_name : Option<"name", "n">, Group<3>,
Arg<"FunctionName">, Completion<"Symbol">,
Expand All @@ -326,6 +326,8 @@ let Command = "disassemble" in {
def disassemble_options_address : Option<"address", "a">, Group<7>,
Arg<"AddressOrExpression">,
Desc<"Disassemble function containing this address.">;
def disassemble_options_force : Option<"force", "\\x01">, Groups<[2,3,4,5,7]>,
Desc<"Force dissasembly of large functions.">;
}

let Command = "expression" in {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_foundation_disasm(self):
if match:
func = match.group(1)
self.runCmd('image lookup -s "%s"' % func)
self.runCmd('disassemble -n "%s"' % func)
self.runCmd('disassemble --force -n "%s"' % func)

@skipIfAsan
def test_simple_disasm(self):
Expand Down
10 changes: 8 additions & 2 deletions lldb/test/Shell/Commands/Inputs/command-disassemble.lldbinit
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ disassemble --start-address 0x0
disassemble --start-address 0x4 --end-address 0x8
disassemble --start-address 0x8 --end-address 0x4
disassemble --address 0x0
disassemble --address 0xdead
disassemble --address 0xdeadb
disassemble --address 0x100
disassemble --address 0x100 --count 3
disassemble --address 0x100 --force
disassemble --start-address 0x0 --count 7
disassemble --start-address 0x0 --end-address 0x20 --count 7
disassemble --address 0x0 --count 7
disassemble --name case1
disassemble --name case2
disassemble --name case3
disassemble --name case3 --count 3
11 changes: 9 additions & 2 deletions lldb/test/Shell/Commands/command-disassemble-process.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
# REQUIRES: x86

# RUN: yaml2obj --docnum=1 %s > %T/command-disassemble-process.exe
# RUN: yaml2obj --docnum=1 -DMAIN_SIZE=8 %s > %T/command-disassemble-process.exe
# RUN: yaml2obj --docnum=1 -DMAIN_SIZE=4000 %s > %T/command-disassemble-process.big.exe
# RUN: yaml2obj --docnum=2 %s > %t

# RUN: %lldb -c %t %T/command-disassemble-process.exe \
# RUN: -o "settings set interpreter.stop-command-source-on-error false" \
# RUN: -s %S/Inputs/command-disassemble-process.lldbinit -o exit 2>&1 \
# RUN: | FileCheck %s

# RUN: %lldb -c %t %T/command-disassemble-process.big.exe \
# RUN: -o disassemble -o exit 2>&1 | FileCheck %s --check-prefix=BIG

# CHECK: (lldb) disassemble
# CHECK-NEXT: command-disassemble-process.exe`main:
# CHECK-NEXT: 0x4002 <+0>: addb %al, (%rcx)
Expand Down Expand Up @@ -59,6 +63,8 @@
# CHECK-NEXT: 0x400e: addb %cl, (%rcx)
# CHECK-NEXT: 0x4010: addb %cl, (%rdx)

# BIG: error: Not disassembling the current function because it is very large [0x0000000000004002-0x0000000000004fa2). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.

--- !ELF
FileHeader:
Class: ELFCLASS64
Expand All @@ -72,6 +78,7 @@ Sections:
Address: 0x0000000000004000
AddressAlign: 0x0000000000001000
Content: 00000001000200030006000700080009000A000B000E000F00100011001200130016001700180019001A001B001E001F00200021002200230026002700280029002A002B002E002F
Size: 0x10000
- Name: .note.gnu.build-id
Type: SHT_NOTE
Flags: [ SHF_ALLOC ]
Expand All @@ -83,7 +90,7 @@ Symbols:
Type: STT_FUNC
Section: .text
Value: 0x0000000000004002
Size: 0x0000000000000008
Size: [[MAIN_SIZE]]
ProgramHeaders:
- Type: PT_LOAD
Flags: [ PF_X, PF_R ]
Expand Down
72 changes: 68 additions & 4 deletions lldb/test/Shell/Commands/command-disassemble.s
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,19 @@
# CHECK-NEXT: command-disassemble.s.tmp[0x8] <+8>: int $0x14
# CHECK-NEXT: command-disassemble.s.tmp[0xa] <+10>: int $0x15
# CHECK-NEXT: command-disassemble.s.tmp[0xc] <+12>: int $0x16
# CHECK-NEXT: (lldb) disassemble --address 0xdead
# CHECK-NEXT: error: Could not find function bounds for address 0xdead
# CHECK-NEXT: (lldb) disassemble --address 0xdeadb
# CHECK-NEXT: error: Could not find function bounds for address 0xdeadb
# CHECK-NEXT: (lldb) disassemble --address 0x100
# CHECK-NEXT: error: Not disassembling the function because it is very large [0x0000000000000040-0x0000000000002040). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
# CHECK-NEXT: (lldb) disassemble --address 0x100 --count 3
# CHECK-NEXT: command-disassemble.s.tmp`very_long:
# CHECK-NEXT: command-disassemble.s.tmp[0x40] <+0>: int $0x2a
# CHECK-NEXT: command-disassemble.s.tmp[0x42] <+2>: int $0x2a
# CHECK-NEXT: command-disassemble.s.tmp[0x44] <+4>: int $0x2a
# CHECK-NEXT: (lldb) disassemble --address 0x100 --force
# CHECK-NEXT: command-disassemble.s.tmp`very_long:
# CHECK-NEXT: command-disassemble.s.tmp[0x40] <+0>: int $0x2a
# CHECK: command-disassemble.s.tmp[0x203e] <+8190>: int $0x2a
# CHECK-NEXT: (lldb) disassemble --start-address 0x0 --count 7
# CHECK-NEXT: command-disassemble.s.tmp`foo:
# CHECK-NEXT: command-disassemble.s.tmp[0x0] <+0>: int $0x10
Expand All @@ -64,8 +75,32 @@
# CHECK-NEXT: command-disassemble.s.tmp[0xc] <+12>: int $0x16
# CHECK-NEXT: (lldb) disassemble --start-address 0x0 --end-address 0x20 --count 7
# CHECK-NEXT: error: invalid combination of options for the given command
# CHECK-NEXT: (lldb) disassemble --address 0x0 --count 7
# CHECK-NEXT: error: invalid combination of options for the given command
# CHECK-NEXT: (lldb) disassemble --name case1
# CHECK-NEXT: command-disassemble.s.tmp`n1::case1:
# CHECK-NEXT: command-disassemble.s.tmp[0x2040] <+0>: int $0x30
# CHECK-EMPTY:
# CHECK-NEXT: command-disassemble.s.tmp`n2::case1:
# CHECK-NEXT: command-disassemble.s.tmp[0x2042] <+0>: int $0x31
# CHECK-EMPTY:
# CHECK-NEXT: (lldb) disassemble --name case2
# CHECK-NEXT: command-disassemble.s.tmp`n1::case2:
# CHECK-NEXT: command-disassemble.s.tmp[0x2044] <+0>: int $0x32
# CHECK-NEXT: warning: Not disassembling a range because it is very large [0x0000000000002046-0x0000000000004046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
# CHECK-NEXT: (lldb) disassemble --name case3
# CHECK-NEXT: error: Not disassembling a range because it is very large [0x0000000000004046-0x0000000000006046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
# CHECK-NEXT: Not disassembling a range because it is very large [0x0000000000006046-0x0000000000008046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
# CHECK-NEXT: (lldb) disassemble --name case3 --count 3
# CHECK-NEXT: command-disassemble.s.tmp`n1::case3:
# CHECK-NEXT: command-disassemble.s.tmp[0x4046] <+0>: int $0x2a
# CHECK-NEXT: command-disassemble.s.tmp[0x4048] <+2>: int $0x2a
# CHECK-NEXT: command-disassemble.s.tmp[0x404a] <+4>: int $0x2a
# CHECK-EMPTY:
# CHECK-NEXT: command-disassemble.s.tmp`n2::case3:
# CHECK-NEXT: command-disassemble.s.tmp[0x6046] <+0>: int $0x2a
# CHECK-NEXT: command-disassemble.s.tmp[0x6048] <+2>: int $0x2a
# CHECK-NEXT: command-disassemble.s.tmp[0x604a] <+4>: int $0x2a
# CHECK-EMPTY:


.text
foo:
Expand Down Expand Up @@ -102,3 +137,32 @@ bar:
int $0x2d
int $0x2e
int $0x2f

very_long:
.rept 0x1000
int $42
.endr

_ZN2n15case1Ev:
int $0x30

_ZN2n25case1Ev:
int $0x31

_ZN2n15case2Ev:
int $0x32

_ZN2n25case2Ev:
.rept 0x1000
int $42
.endr

_ZN2n15case3Ev:
.rept 0x1000
int $42
.endr

_ZN2n25case3Ev:
.rept 0x1000
int $42
.endr

0 comments on commit 8b845ac

Please sign in to comment.