Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: improve debug info code #1465

Merged
merged 1 commit into from
Mar 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 28 additions & 7 deletions src/fuzz_introspector/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,28 @@ def load_data_files(self, parallelise=True, correlation_file=None):
self.coverage_url, self.base_folder)
# Load all debug files
self.debug_files = data_loader.load_all_debug_files(self.base_folder)

# Find all relevant debug information yaml files.
self.debug_type_files = data_loader.find_all_debug_all_types_files(
self.base_folder)
self.debug_function_files = data_loader.find_all_debug_function_files(
self.base_folder)

def load_debug_report(self):
"""Load and digest debug information."""
self.debug_report = debug_info.load_debug_report(self.debug_files)

# Load the yaml content of debug files holding type information and
# function information.
self.debug_all_types = debug_info.load_debug_all_yaml_files(
self.debug_type_files)
self.debug_all_functions = debug_info.load_debug_all_yaml_files(
self.debug_function_files)

# Index the functions based on file locations. This is useful for
# quickly looking up debug function details based on their file
# locations, which we can get from the function data collected by
# the LLVM module.
tmp_debug_functions = dict()
no_path_debug_funcs = list()
for func in self.debug_all_functions:
Expand All @@ -135,7 +145,7 @@ def load_debug_report(self):

# Extract the raw function signature. This propagates types into all of
# the debug functions.
debug_info.clean_extract_raw_all_debugged_function_signatures(
debug_info.correlate_debugged_function_to_debug_types(
self.debug_all_types, self.debug_all_functions)

def dump_debug_report(self):
Expand Down Expand Up @@ -837,9 +847,12 @@ def convert_param_list_to_str_v2(param_list):
return raw_sig.strip()


def correlate_introspector_func_to_debug_information_v2(
if_func, all_debug_functions, debug_dict_by_name,
debug_dict_by_filename):
def correlate_introspector_func_to_debug_information(if_func,
all_debug_functions,
debug_dict_by_name,
debug_dict_by_filename):
"""Correlate a single LLVM-based function to a given function in the
collected debug information."""
# Check if name matches. If so, this one is easy.
same_name_dfs = debug_dict_by_name.get(if_func['Func name'], [])

Expand Down Expand Up @@ -888,20 +901,28 @@ def correlate_introspector_func_to_debug_information_v2(
return None, None


def correlate_introspection_functions_to_debug_info_v2(
all_functions_json_report, debug_all_functions):
def correlate_introspection_functions_to_debug_info(all_functions_json_report,
debug_all_functions):
"""Correlates function data collected by debug information to function
data collected by LLVMs module, and uses the correlated data to generate
function signatures for each function based on debug information."""

# A lot of look-ups are needed when matching LLVM functions to debug
# functions. Start with creating two indexes to make these look-ups
# faster.
debug_dict_by_name = dict()
debug_dict_by_filename = dict()
for df in debug_all_functions:
# Normalize the source file
df['source']['source_file'] = os.path.normpath(df['source'].get(
'source_file', ''))

# Append debug function to name-index.
entry_list1 = debug_dict_by_name.get(df.get('name', ''), [])
entry_list1.append(df)
debug_dict_by_name[df.get('name', '')] = entry_list1

# Append debug function to file-index.
entry_list2 = debug_dict_by_filename.get(
df['source'].get('source_file', ''), [])
entry_list2.append(df)
Expand All @@ -912,7 +933,7 @@ def correlate_introspection_functions_to_debug_info_v2(
print("%s ------- %d" % (dl3, len(debug_dict_by_filename[dl3])))

for if_func in all_functions_json_report:
func_sig, correlated_debug_function = correlate_introspector_func_to_debug_information_v2(
func_sig, correlated_debug_function = correlate_introspector_func_to_debug_information(
if_func, debug_all_functions, debug_dict_by_name,
debug_dict_by_filename)

Expand Down
25 changes: 16 additions & 9 deletions src/fuzz_introspector/debug_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,9 @@ def load_debug_all_yaml_files(debug_all_types_files):
return elem_list


def extract_func_sig_friendly_type_tags(target_type, all_debug_types,
debug_type_dictionary):
def extract_func_sig_friendly_type_tags(target_type, debug_type_dictionary):
"""Recursively iterates atomic type elements to construct a friendly
string representing the type."""
if int(target_type) == 0:
return ['void']

Expand Down Expand Up @@ -383,11 +384,11 @@ def extract_func_sig_friendly_type_tags(target_type, all_debug_types,
return tags


def extract_debugged_function_signature(dfunc, all_debug_types,
debug_type_dictionary):
def extract_debugged_function_signature(dfunc, debug_type_dictionary):
"""Extract the raw types used by a function."""
try:
return_type = extract_func_sig_friendly_type_tags(
dfunc['type_arguments'][0], all_debug_types, debug_type_dictionary)
dfunc['type_arguments'][0], debug_type_dictionary)
except IndexError:
return_type = 'N/A'
params = []
Expand All @@ -396,7 +397,6 @@ def extract_debugged_function_signature(dfunc, all_debug_types,
for i in range(1, len(dfunc['type_arguments'])):
params.append(
extract_func_sig_friendly_type_tags(dfunc['type_arguments'][i],
all_debug_types,
debug_type_dictionary))

source_file = dfunc['file_location'].split(":")[0]
Expand All @@ -414,9 +414,16 @@ def extract_debugged_function_signature(dfunc, all_debug_types,
return function_signature_elements, source_location


def clean_extract_raw_all_debugged_function_signatures(all_debug_types,
all_debug_functions):
def correlate_debugged_function_to_debug_types(all_debug_types,
all_debug_functions):
"""Correlate debug information about all functions and all types. The
result is a lot of atomic debug-information-extracted types are correlated
to the debug function."""
print("Correlating")

# Index debug types by address. We need to do a lot of look ups when
# refining data types where the address is the key, so a fast
# look-up mechanism is useful here.
debug_type_dictionary = dict()
for debug_type in all_debug_types:
debug_type_dictionary[int(debug_type['addr'])] = debug_type
Expand All @@ -426,7 +433,7 @@ def clean_extract_raw_all_debugged_function_signatures(all_debug_types,
logger.info("idx: %d" % (idx))
idx += 1
func_signature_elems, source_location = extract_debugged_function_signature(
dfunc, all_debug_types, debug_type_dictionary)
dfunc, debug_type_dictionary)

dfunc['func_signature_elems'] = func_signature_elems
dfunc['source'] = source_location
Expand Down
2 changes: 1 addition & 1 deletion src/fuzz_introspector/html_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ def create_html_report(introspection_proj: analysis.IntrospectionProject,
introspection_proj.load_debug_report()

# Correlate debug info to introspector functions
analysis.correlate_introspection_functions_to_debug_info_v2(
analysis.correlate_introspection_functions_to_debug_info(
all_functions_json_report, introspection_proj.debug_all_functions)

# Write various stats and all-functions data to summary.json
Expand Down
Loading