From 2f54f678ca204dbb14c29987d22e6f6a3d8c0ff9 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Fri, 17 Jan 2025 20:11:34 +0000 Subject: [PATCH 01/13] analysis: Change analysis interface to allow passing in properties Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/__init__.py | 2 + .../analyses/source_code_line_analyser.py | 90 +++++++++++++++++++ src/fuzz_introspector/analysis.py | 13 ++- src/fuzz_introspector/cli.py | 46 ++++++++-- src/fuzz_introspector/commands.py | 19 ++-- .../frontends/frontend_jvm.py | 3 + src/fuzz_introspector/html_report.py | 9 +- 7 files changed, 164 insertions(+), 18 deletions(-) create mode 100644 src/fuzz_introspector/analyses/source_code_line_analyser.py diff --git a/src/fuzz_introspector/analyses/__init__.py b/src/fuzz_introspector/analyses/__init__.py index 2c1ded4f4..7c65cf9e5 100644 --- a/src/fuzz_introspector/analyses/__init__.py +++ b/src/fuzz_introspector/analyses/__init__.py @@ -8,6 +8,7 @@ from fuzz_introspector.analyses import runtime_coverage_analysis from fuzz_introspector.analyses import sinks_analyser from fuzz_introspector.analyses import annotated_cfg +from fuzz_introspector.analyses import source_code_line_analyser # All optional analyses. # Ordering here is important as top analysis will be shown first in the report @@ -22,4 +23,5 @@ metadata.MetadataAnalysis, sinks_analyser.SinkCoverageAnalyser, annotated_cfg.FuzzAnnotatedCFG, + source_code_line_analyser.SourceCodeLineFunctionAnalyser, ] diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py new file mode 100644 index 000000000..77763cc73 --- /dev/null +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -0,0 +1,90 @@ +# Copyright 2025 Fuzz Introspector Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Analysis plugin for introspection of the function on target line in target source file.""" + +import json +import logging + +from bs4 import BeautifulSoup as bs + +from typing import (Any, List, Tuple, Dict, Optional) + +from fuzz_introspector import (analysis, code_coverage, constants, cfg_load, + html_helpers, json_report, utils) + +from fuzz_introspector.datatypes import (project_profile, fuzzer_profile, + function_profile) + +logger = logging.getLogger(name=__name__) + + +class SourceCodeLineFunctionAnalyser(analysis.AnalysisInterface): + name: str = 'SourceCodeLineFunctionAnalyser' + + def __init__(self): + self.json_results: Dict[str, Any] = dict() + self.json_string_result = '' + + @classmethod + def get_name(cls): + """Return the analyser identifying name for processing. + + :return: The identifying name of this analyser + :rtype: str + """ + return cls.name + + def get_json_string_result(self) -> str: + """Return the stored json string result. + + :return: The json string result processed and stored + by this analyser + :rtype: str + """ + if self.json_string_result: + return self.json_string_result + return json.dumps(self.json_results) + + def set_json_string_result(self, json_string): + """Store the result of this analyser as json string result + for further processing in a later time. + + :param json_string: A json string variable storing the + processing result of the analyser for future use + :type json_string: str + """ + self.json_string_result = json_string + + def analysis_func(self, + table_of_contents: html_helpers.HtmlTableOfContents, + tables: List[str], + proj_profile: project_profile.MergedProjectProfile, + profiles: List[fuzzer_profile.FuzzerProfile], + basefolder: str, coverage_url: str, + conclusions: List[html_helpers.HTMLConclusion], + out_dir) -> str: + logger.info(f" - Running analysis {self.get_name()}") + logger.info(self.properties) + # Get all functions from the profiles + all_functions = list(proj_profile.all_functions.values()) + all_functions.extend(proj_profile.all_constructors.values()) + + # Generate a Source File to Function Profile map and store in JSON Result + for function in all_functions: + func_list = self.json_results.get(function.function_source_file, + []) + func_list.append(function) + self.json_results[function.function_source_file] = func_list + + return '' diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index d60674f01..e68beb6c9 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -172,6 +172,10 @@ class AnalysisInterface(abc.ABC): json_string_result: str = "" display_html: bool = False + def set_additional_properties(self, properties: dict[str, str]): + """Allow setting additional properties for this analysis.""" + self.properties = properties + @abc.abstractmethod def analysis_func(self, table_of_contents: html_helpers.HtmlTableOfContents, @@ -230,9 +234,12 @@ def set_display_html(self, is_display_html): self.display_html = is_display_html -def instantiate_analysis_interface(cls: Type[AnalysisInterface]): +def instantiate_analysis_interface(cls: Type[AnalysisInterface], + props: dict[str, str]): """Wrapper function to satisfy Mypy semantics""" - return cls() + analysis_interface = cls() + analysis_interface.set_additional_properties(props) + return analysis_interface class FuzzBranchBlocker: @@ -286,7 +293,7 @@ def get_node_coverage_hitcount(demangled_name: str, callstack: Dict[int, str], if is_first: # As this is the first node ensure it is indeed the entrypoint. # The difference is this node has node "parent" or prior nodes. - + logger.info(demangled_name) if not profile.func_is_entrypoint(demangled_name): raise AnalysisError( "First node in calltree is non-fuzzer function") diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index d86a8d414..ef4546e51 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -1,4 +1,3 @@ -# Copyright 2024 Fuzz Introspector Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,6 +45,21 @@ def get_cmdline_parser() -> argparse.ArgumentParser: full_parser.add_argument('--out-dir', default='') full_parser.add_argument('--name', default='no-name') full_parser.add_argument('--coverage_url', default='') + full_parser.add_argument('--analyses', + nargs='+', + default=[], + help=''' + Analyses to run. Available options: + AnnotatedCFG, BugDigestorAnalysis, FuzzCalltreeAnalysis, + FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, + FilePathAnalyser, ThirdPartyAPICoverageAnalyser, + MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, + SinkCoverageAnalyser, FunctionSourceLineAnalyser + ''') + full_parser.add_argument('--properties', + nargs='*', + default=[], + help='Additional properties for analysis') # Report generation command report_parser = subparsers.add_parser( @@ -70,7 +84,11 @@ def get_cmdline_parser() -> argparse.ArgumentParser: ], help=""" Analyses to run. Available options: - OptimalTargets, FuzzEngineInput, ThirdPartyAPICoverageAnalyser + AnnotatedCFG, BugDigestorAnalysis, FuzzCalltreeAnalysis, + FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, + FilePathAnalyser, ThirdPartyAPICoverageAnalyser, + MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, + SinkCoverageAnalyser, FunctionSourceLineAnalyser """) report_parser.add_argument("--enable-all-analyses", action='store_true', @@ -93,6 +111,10 @@ def get_cmdline_parser() -> argparse.ArgumentParser: nargs="+", default=["FuzzEngineInputAnalysis"], help="State which analysis requires separate json report output") + report_parser.add_argument('--properties', + nargs='*', + default=[], + help='Additional properties for analysis') # Command for correlating binary files to fuzzerLog files correlate_parser = subparsers.add_parser( @@ -151,10 +173,21 @@ def main() -> int: logger.info("Running fuzz introspector post-processing") if args.command == 'report': - return_code = commands.run_analysis_on_dir( - args.target_dir, args.coverage_url, args.analyses, - args.correlation_file, args.enable_all_analyses, args.name, - args.language, args.output_json) + props: dict[str, str] = {} + for property in args.properties: + if property.count('=') == 1: + key, value = property.split('=', 1) + props[key] = value + + return_code = commands.run_analysis_on_dir(args.target_dir, + args.coverage_url, + args.analyses, + args.correlation_file, + args.enable_all_analyses, + args.name, + args.language, + args.output_json, + props=props) logger.info("Ending fuzz introspector report generation") elif args.command == 'correlate': return_code = commands.correlate_binaries_to_logs(args.binaries_dir) @@ -172,3 +205,4 @@ def main() -> int: if __name__ == "__main__": main() +"--enable-all-analyses", diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index d431b2129..1c3db16fa 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -69,14 +69,21 @@ def end_to_end(args) -> int: else: language = args.language + props: dict[str, str] = {} + for property in args.properties: + if property.count('=') == 1: + key, value = property.split('=', 1) + props[key] = value + return run_analysis_on_dir(target_folder=out_dir, coverage_url=args.coverage_url, - analyses_to_run=[], + analyses_to_run=args.analyses, correlation_file='', - enable_all_analyses=True, + enable_all_analyses=(not args.analyses), report_name=args.name, language=language, - out_dir=out_dir) + out_dir=out_dir, + props=props) def run_analysis_on_dir(target_folder: str, @@ -89,7 +96,8 @@ def run_analysis_on_dir(target_folder: str, output_json: Optional[List[str]] = None, parallelise: bool = True, dump_files: bool = True, - out_dir: str = '') -> int: + out_dir: str = '', + props: dict[str, str] = {}) -> int: """Runs Fuzz Introspector analysis from based on the results from a frontend run. The primary task is to aggregate the data and generate a HTML report.""" @@ -114,7 +122,8 @@ def run_analysis_on_dir(target_folder: str, output_json, report_name, dump_files, - out_dir=out_dir) + out_dir=out_dir, + props=props) return constants.APP_EXIT_SUCCESS diff --git a/src/fuzz_introspector/frontends/frontend_jvm.py b/src/fuzz_introspector/frontends/frontend_jvm.py index 36ad2e995..0e34942e6 100644 --- a/src/fuzz_introspector/frontends/frontend_jvm.py +++ b/src/fuzz_introspector/frontends/frontend_jvm.py @@ -1191,6 +1191,9 @@ def extract_calltree(self, if not visited_functions: visited_functions = set() + if function and '].' not in function: + function = None + if not source_code and function: source_code = self.find_source_with_method(function) diff --git a/src/fuzz_introspector/html_report.py b/src/fuzz_introspector/html_report.py index 140ec5a4a..ebef0c09f 100644 --- a/src/fuzz_introspector/html_report.py +++ b/src/fuzz_introspector/html_report.py @@ -643,7 +643,7 @@ def create_section_all_functions(table_of_contents, tables, proj_profile, def create_section_optional_analyses( table_of_contents, analyses_to_run, output_json, tables, introspection_proj: analysis.IntrospectionProject, basefolder, - coverage_url, conclusions, dump_files, out_dir) -> str: + coverage_url, conclusions, dump_files, out_dir, props) -> str: """Creates the HTML sections containing optional analyses.""" html_report_core = "" logger.info(" - Handling optional analyses") @@ -662,7 +662,7 @@ def create_section_optional_analyses( analysis_name = analysis_interface.get_name() if analysis_name in combined_analyses: analysis_instance = analysis.instantiate_analysis_interface( - analysis_interface) + analysis_interface, props) analysis_instance.dump_files = dump_files # Set display_html flag for the analysis_instance @@ -727,7 +727,8 @@ def create_html_report(introspection_proj: analysis.IntrospectionProject, output_json, report_name, dump_files, - out_dir: str = '') -> None: + out_dir: str = '', + props: dict[str, str] = {}) -> None: """ Logs a complete report. This is the current main place for looking at data produced by fuzz introspector. @@ -783,7 +784,7 @@ def create_html_report(introspection_proj: analysis.IntrospectionProject, table_of_contents, analyses_to_run, output_json, tables, introspection_proj, introspection_proj.proj_profile.basefolder, introspection_proj.proj_profile.coverage_url, conclusions, dump_files, - out_dir) + out_dir, props) # Create HTML showing the conclusions at the top of the report. html_report_top += html_helpers.create_conclusions_box(conclusions) From 40875222cd1551484ef68ce70dac52257bf131c7 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Fri, 17 Jan 2025 20:15:37 +0000 Subject: [PATCH 02/13] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index e68beb6c9..f51597245 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -293,7 +293,7 @@ def get_node_coverage_hitcount(demangled_name: str, callstack: Dict[int, str], if is_first: # As this is the first node ensure it is indeed the entrypoint. # The difference is this node has node "parent" or prior nodes. - logger.info(demangled_name) + if not profile.func_is_entrypoint(demangled_name): raise AnalysisError( "First node in calltree is non-fuzzer function") From 8afcd9748ca9d4b7a6c54e621689972354f43134 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Fri, 17 Jan 2025 21:54:51 +0000 Subject: [PATCH 03/13] Fix formatting Signed-off-by: Arthur Chan --- .../analyses/source_code_line_analyser.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index 77763cc73..397f3f553 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -16,15 +16,11 @@ import json import logging -from bs4 import BeautifulSoup as bs +from typing import (Any, List, Dict) -from typing import (Any, List, Tuple, Dict, Optional) +from fuzz_introspector import (analysis, html_helpers) -from fuzz_introspector import (analysis, code_coverage, constants, cfg_load, - html_helpers, json_report, utils) - -from fuzz_introspector.datatypes import (project_profile, fuzzer_profile, - function_profile) +from fuzz_introspector.datatypes import (project_profile, fuzzer_profile) logger = logging.getLogger(name=__name__) From 8587f60d08e758e29886c5c09e5f9e6c4b80ee7b Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 13:41:42 +0000 Subject: [PATCH 04/13] Fix logic Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/__init__.py | 2 +- .../analyses/source_code_line_analyser.py | 45 ++++++++++++++++--- src/fuzz_introspector/analysis.py | 5 ++- src/fuzz_introspector/cli.py | 39 +++++++++------- src/fuzz_introspector/commands.py | 19 ++++---- .../datatypes/function_profile.py | 4 ++ .../frontends/frontend_jvm.py | 4 ++ src/fuzz_introspector/html_report.py | 3 +- 8 files changed, 87 insertions(+), 34 deletions(-) diff --git a/src/fuzz_introspector/analyses/__init__.py b/src/fuzz_introspector/analyses/__init__.py index 7c65cf9e5..50c019075 100644 --- a/src/fuzz_introspector/analyses/__init__.py +++ b/src/fuzz_introspector/analyses/__init__.py @@ -23,5 +23,5 @@ metadata.MetadataAnalysis, sinks_analyser.SinkCoverageAnalyser, annotated_cfg.FuzzAnnotatedCFG, - source_code_line_analyser.SourceCodeLineFunctionAnalyser, + source_code_line_analyser.SourceCodeLineAnalyser, ] diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index 397f3f553..43ee16cd1 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -13,6 +13,7 @@ # limitations under the License. """Analysis plugin for introspection of the function on target line in target source file.""" +import os import json import logging @@ -25,8 +26,8 @@ logger = logging.getLogger(name=__name__) -class SourceCodeLineFunctionAnalyser(analysis.AnalysisInterface): - name: str = 'SourceCodeLineFunctionAnalyser' +class SourceCodeLineAnalyser(analysis.AnalysisInterface): + name: str = 'SourceCodeLineAnalyser' def __init__(self): self.json_results: Dict[str, Any] = dict() @@ -70,17 +71,49 @@ def analysis_func(self, basefolder: str, coverage_url: str, conclusions: List[html_helpers.HTMLConclusion], out_dir) -> str: - logger.info(f" - Running analysis {self.get_name()}") - logger.info(self.properties) + logger.info(f' - Running analysis {self.get_name()}') + + # Get target source file and line + target_source = self.properties.get('source_file') + target_line = self.properties.get('line') + + if not target_source or not isinstance(target_line, int) or target_line <= 0: + logger.error('No valid source code or target line are provided') + return '' + # Get all functions from the profiles all_functions = list(proj_profile.all_functions.values()) all_functions.extend(proj_profile.all_constructors.values()) # Generate a Source File to Function Profile map and store in JSON Result + func_file_map = {} for function in all_functions: - func_list = self.json_results.get(function.function_source_file, + func_list = func_file_map.get(function.function_source_file_path, []) func_list.append(function) - self.json_results[function.function_source_file] = func_list + func_file_map[function.function_source_file_path] = func_list + + if os.sep in target_source: + # File path + target_func_list = func_file_map.get(target_source, []) + else: + # File name + target_func_list = [] + for key, value in func_file_map.items(): + if os.path.basename(key) == target_source: + target_func_list.extend(value) + + if not target_func_list: + logger.error('Failed to locate the target source file ' + f'{target_source} from the project.') + + result_list = [] + for func in target_func_list: + if func.function_linenumber <= target_line <= func.function_line_number_end: + logger.info(f'Found function {func.function_name} from line {target_line} in {target_source}') + result_list.append(func) + + if result_list: + self.json_results['functions'] = result_list return '' diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index f51597245..10fc3011c 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -24,6 +24,7 @@ List, Type, Set, + Union ) from fuzz_introspector import (cfg_load, code_coverage, constants, data_loader, @@ -172,7 +173,7 @@ class AnalysisInterface(abc.ABC): json_string_result: str = "" display_html: bool = False - def set_additional_properties(self, properties: dict[str, str]): + def set_additional_properties(self, properties: dict[str, Union[str, int]]): """Allow setting additional properties for this analysis.""" self.properties = properties @@ -235,7 +236,7 @@ def set_display_html(self, is_display_html): def instantiate_analysis_interface(cls: Type[AnalysisInterface], - props: dict[str, str]): + props: dict[str, Union[str, int]]): """Wrapper function to satisfy Mypy semantics""" analysis_interface = cls() analysis_interface.set_additional_properties(props) diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index e8f8443b6..1c6acabd2 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -1,3 +1,4 @@ +# Copyright 2024 Fuzz Introspector Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +20,8 @@ from fuzz_introspector import commands, constants +from typing import Union + sys.setrecursionlimit(10000) logger = logging.getLogger(name=__name__) @@ -72,12 +75,16 @@ def get_cmdline_parser() -> argparse.ArgumentParser: FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, FilePathAnalyser, ThirdPartyAPICoverageAnalyser, MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, - SinkCoverageAnalyser, FunctionSourceLineAnalyser + SinkCoverageAnalyser, SourceCodeLineAnalyser ''') - full_parser.add_argument('--properties', - nargs='*', - default=[], - help='Additional properties for analysis') + full_parser.add_argument('--source-file', + default='', + type=str, + help='Target file path or name for SourceCodeLineAnalyser') + full_parser.add_argument('--line', + default=-1, + type=int, + help='Target line for SourceCodeLineAnalyser') # Report generation command report_parser = subparsers.add_parser( @@ -106,7 +113,7 @@ def get_cmdline_parser() -> argparse.ArgumentParser: FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, FilePathAnalyser, ThirdPartyAPICoverageAnalyser, MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, - SinkCoverageAnalyser, FunctionSourceLineAnalyser + SinkCoverageAnalyser, SourceCodeLineAnalyser """) report_parser.add_argument("--enable-all-analyses", action='store_true', @@ -129,10 +136,14 @@ def get_cmdline_parser() -> argparse.ArgumentParser: nargs="+", default=["FuzzEngineInputAnalysis"], help="State which analysis requires separate json report output") - report_parser.add_argument('--properties', - nargs='*', - default=[], - help='Additional properties for analysis') + report_parser.add_argument('--source-file', + default='', + type=str, + help='Target file path or name for SourceCodeLineAnalyser') + report_parser.add_argument('--line', + default=-1, + type=int, + help='Target line for SourceCodeLineAnalyser') # Command for correlating binary files to fuzzerLog files correlate_parser = subparsers.add_parser( @@ -185,11 +196,9 @@ def main() -> int: logger.info("Running fuzz introspector post-processing") if args.command == 'report': - props: dict[str, str] = {} - for property in args.properties: - if property.count('=') == 1: - key, value = property.split('=', 1) - props[key] = value + props: dict[str, Union[str, int]] = {} + props['source_file'] = args.source_file + props['line'] = args.line return_code = commands.run_analysis_on_dir(args.target_dir, args.coverage_url, diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index b98c3f5fb..facd983ba 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -18,7 +18,7 @@ import json import yaml import shutil -from typing import List, Optional +from typing import Optional, Union from fuzz_introspector import analysis from fuzz_introspector import constants @@ -54,6 +54,9 @@ def end_to_end(args) -> int: else: out_dir = os.getcwd() + if not os.path.exists(out_dir): + os.mkdir(out_dir) + if args.language == constants.LANGUAGES.JAVA: entrypoint = 'fuzzerTestOneInput' else: @@ -69,11 +72,9 @@ def end_to_end(args) -> int: else: language = args.language - props: dict[str, str] = {} - for property in args.properties: - if property.count('=') == 1: - key, value = property.split('=', 1) - props[key] = value + props: dict[str, Union[str, int]] = {} + props['source_file'] = args.source_file + props['line'] = args.line return run_analysis_on_dir(target_folder=out_dir, coverage_url=args.coverage_url, @@ -88,16 +89,16 @@ def end_to_end(args) -> int: def run_analysis_on_dir(target_folder: str, coverage_url: str, - analyses_to_run: List[str], + analyses_to_run: list[str], correlation_file: str, enable_all_analyses: bool, report_name: str, language: str, - output_json: Optional[List[str]] = None, + output_json: Optional[list[str]] = None, parallelise: bool = True, dump_files: bool = True, out_dir: str = '', - props: dict[str, str] = {}) -> int: + props: dict[str, Union[str, int]] = {}) -> int: """Runs Fuzz Introspector analysis from based on the results from a frontend run. The primary task is to aggregate the data and generate a HTML report.""" diff --git a/src/fuzz_introspector/datatypes/function_profile.py b/src/fuzz_introspector/datatypes/function_profile.py index a882068d5..f13a77993 100644 --- a/src/fuzz_introspector/datatypes/function_profile.py +++ b/src/fuzz_introspector/datatypes/function_profile.py @@ -57,6 +57,10 @@ def __init__(self, elem: Dict[Any, Any]) -> None: elem['BranchProfiles']) self.signature = elem.get('signature', '') + # For backward compatibility on jvm project + # that the function_source_file stored the class name instead of file path. + self.function_source_file_path = elem.get('functionSourceFilePath', self.function_source_file) + # Duplication of functions_reached to keep the original sets # of call trees for further processing and analysis. This # could avoid loss of call tree information when functions_reached diff --git a/src/fuzz_introspector/frontends/frontend_jvm.py b/src/fuzz_introspector/frontends/frontend_jvm.py index bd532ef9f..cd04c1382 100644 --- a/src/fuzz_introspector/frontends/frontend_jvm.py +++ b/src/fuzz_introspector/frontends/frontend_jvm.py @@ -232,6 +232,9 @@ def __init__(self, # Store method line information if self.is_default_constructor: +# print('FUCK') +# print(self.root.text.decode()) +# print('ENDFUCK') self.start_line = -1 self.end_line = -1 self.name = '' @@ -1045,6 +1048,7 @@ def dump_module_logic(self, method_dict: dict[str, Any] = {} method_dict['functionName'] = method.name method_dict['functionSourceFile'] = method.class_interface.name + method_dict['functionSourceFilePath'] = method.parent_source.source_file method_dict['functionLinenumber'] = method.start_line method_dict['functionLinenumberEnd'] = method.end_line method_dict['linkageType'] = '' diff --git a/src/fuzz_introspector/html_report.py b/src/fuzz_introspector/html_report.py index ebef0c09f..ce29bd232 100644 --- a/src/fuzz_introspector/html_report.py +++ b/src/fuzz_introspector/html_report.py @@ -25,6 +25,7 @@ List, Optional, Tuple, + Union, ) from fuzz_introspector import (analysis, constants, html_constants, @@ -728,7 +729,7 @@ def create_html_report(introspection_proj: analysis.IntrospectionProject, report_name, dump_files, out_dir: str = '', - props: dict[str, str] = {}) -> None: + props: dict[str, Union[str, int]] = {}) -> None: """ Logs a complete report. This is the current main place for looking at data produced by fuzz introspector. From 283e8ca1277283d26207ddcd3b8e463aa0a3eaac Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 14:06:13 +0000 Subject: [PATCH 05/13] Fix formatting Signed-off-by: Arthur Chan --- .../analyses/source_code_line_analyser.py | 13 +++++++------ src/fuzz_introspector/analysis.py | 11 +++-------- src/fuzz_introspector/cli.py | 18 ++++++++++-------- .../datatypes/function_profile.py | 4 ---- .../frontends/frontend_jvm.py | 4 ---- 5 files changed, 20 insertions(+), 30 deletions(-) diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index 43ee16cd1..45fd5b3f6 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -21,7 +21,8 @@ from fuzz_introspector import (analysis, html_helpers) -from fuzz_introspector.datatypes import (project_profile, fuzzer_profile) +from fuzz_introspector.datatypes import (project_profile, fuzzer_profile, + function_profile) logger = logging.getLogger(name=__name__) @@ -74,7 +75,7 @@ def analysis_func(self, logger.info(f' - Running analysis {self.get_name()}') # Get target source file and line - target_source = self.properties.get('source_file') + target_source = str(self.properties.get('source_file')) target_line = self.properties.get('line') if not target_source or not isinstance(target_line, int) or target_line <= 0: @@ -86,12 +87,12 @@ def analysis_func(self, all_functions.extend(proj_profile.all_constructors.values()) # Generate a Source File to Function Profile map and store in JSON Result - func_file_map = {} + func_file_map: dict[str, list[function_profile.FunctionProfile]] = {} for function in all_functions: - func_list = func_file_map.get(function.function_source_file_path, - []) + func_list = func_file_map.get(function.function_source_file, + []) func_list.append(function) - func_file_map[function.function_source_file_path] = func_list + func_file_map[function.function_source_file] = func_list if os.sep in target_source: # File path diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index 10fc3011c..bf948faf5 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -19,13 +19,7 @@ import os import shutil -from typing import ( - Dict, - List, - Type, - Set, - Union -) +from typing import (Dict, List, Type, Set, Union) from fuzz_introspector import (cfg_load, code_coverage, constants, data_loader, debug_info, html_helpers, json_report, utils) @@ -173,7 +167,8 @@ class AnalysisInterface(abc.ABC): json_string_result: str = "" display_html: bool = False - def set_additional_properties(self, properties: dict[str, Union[str, int]]): + def set_additional_properties(self, properties: dict[str, Union[str, + int]]): """Allow setting additional properties for this analysis.""" self.properties = properties diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index 1c6acabd2..3924a6b2f 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -77,10 +77,11 @@ def get_cmdline_parser() -> argparse.ArgumentParser: MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, SinkCoverageAnalyser, SourceCodeLineAnalyser ''') - full_parser.add_argument('--source-file', - default='', - type=str, - help='Target file path or name for SourceCodeLineAnalyser') + full_parser.add_argument( + '--source-file', + default='', + type=str, + help='Target file path or name for SourceCodeLineAnalyser') full_parser.add_argument('--line', default=-1, type=int, @@ -136,10 +137,11 @@ def get_cmdline_parser() -> argparse.ArgumentParser: nargs="+", default=["FuzzEngineInputAnalysis"], help="State which analysis requires separate json report output") - report_parser.add_argument('--source-file', - default='', - type=str, - help='Target file path or name for SourceCodeLineAnalyser') + report_parser.add_argument( + '--source-file', + default='', + type=str, + help='Target file path or name for SourceCodeLineAnalyser') report_parser.add_argument('--line', default=-1, type=int, diff --git a/src/fuzz_introspector/datatypes/function_profile.py b/src/fuzz_introspector/datatypes/function_profile.py index f13a77993..a882068d5 100644 --- a/src/fuzz_introspector/datatypes/function_profile.py +++ b/src/fuzz_introspector/datatypes/function_profile.py @@ -57,10 +57,6 @@ def __init__(self, elem: Dict[Any, Any]) -> None: elem['BranchProfiles']) self.signature = elem.get('signature', '') - # For backward compatibility on jvm project - # that the function_source_file stored the class name instead of file path. - self.function_source_file_path = elem.get('functionSourceFilePath', self.function_source_file) - # Duplication of functions_reached to keep the original sets # of call trees for further processing and analysis. This # could avoid loss of call tree information when functions_reached diff --git a/src/fuzz_introspector/frontends/frontend_jvm.py b/src/fuzz_introspector/frontends/frontend_jvm.py index cd04c1382..bd532ef9f 100644 --- a/src/fuzz_introspector/frontends/frontend_jvm.py +++ b/src/fuzz_introspector/frontends/frontend_jvm.py @@ -232,9 +232,6 @@ def __init__(self, # Store method line information if self.is_default_constructor: -# print('FUCK') -# print(self.root.text.decode()) -# print('ENDFUCK') self.start_line = -1 self.end_line = -1 self.name = '' @@ -1048,7 +1045,6 @@ def dump_module_logic(self, method_dict: dict[str, Any] = {} method_dict['functionName'] = method.name method_dict['functionSourceFile'] = method.class_interface.name - method_dict['functionSourceFilePath'] = method.parent_source.source_file method_dict['functionLinenumber'] = method.start_line method_dict['functionLinenumberEnd'] = method.end_line method_dict['linkageType'] = '' From 9cc92424993101579c5a8f696d0222799363b00f Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 14:12:10 +0000 Subject: [PATCH 06/13] Fix formatting Signed-off-by: Arthur Chan --- .../analyses/source_code_line_analyser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index 45fd5b3f6..d115b1cdc 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -87,10 +87,9 @@ def analysis_func(self, all_functions.extend(proj_profile.all_constructors.values()) # Generate a Source File to Function Profile map and store in JSON Result - func_file_map: dict[str, list[function_profile.FunctionProfile]] = {} + func_file_map: dict[str, list[function_profile.FunctionProfile]] = {} for function in all_functions: - func_list = func_file_map.get(function.function_source_file, - []) + func_list = func_file_map.get(function.function_source_file, []) func_list.append(function) func_file_map[function.function_source_file] = func_list @@ -111,7 +110,8 @@ def analysis_func(self, result_list = [] for func in target_func_list: if func.function_linenumber <= target_line <= func.function_line_number_end: - logger.info(f'Found function {func.function_name} from line {target_line} in {target_source}') + logger.info(f'Found function {func.function_name} from line ' + f'{target_line} in {target_source}') result_list.append(func) if result_list: From 6cea3db1418c659cac6e9fcbc05ee9b63aa71074 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 14:24:14 +0000 Subject: [PATCH 07/13] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/source_code_line_analyser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index d115b1cdc..0ef6b0515 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -78,7 +78,8 @@ def analysis_func(self, target_source = str(self.properties.get('source_file')) target_line = self.properties.get('line') - if not target_source or not isinstance(target_line, int) or target_line <= 0: + if not target_source or not isinstance(target_line, + int) or target_line <= 0: logger.error('No valid source code or target line are provided') return '' From 1d75b3cf0bced0274b20d757e63b38db74736e20 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 15:53:44 +0000 Subject: [PATCH 08/13] Fix logic to allow spearate CLI for json only analysis Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/__init__.py | 5 ++ src/fuzz_introspector/cli.py | 69 +++++++++++-------- .../frontends/frontend_jvm.py | 3 - src/fuzz_introspector/html_report.py | 10 ++- 4 files changed, 51 insertions(+), 36 deletions(-) diff --git a/src/fuzz_introspector/analyses/__init__.py b/src/fuzz_introspector/analyses/__init__.py index 50c019075..9c4ca71ad 100644 --- a/src/fuzz_introspector/analyses/__init__.py +++ b/src/fuzz_introspector/analyses/__init__.py @@ -23,5 +23,10 @@ metadata.MetadataAnalysis, sinks_analyser.SinkCoverageAnalyser, annotated_cfg.FuzzAnnotatedCFG, +] + +# This is the list of analyses that are meant to run +# directly from CLI without the need to generate HTML reports +standalone_analyses = [ source_code_line_analyser.SourceCodeLineAnalyser, ] diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index 3924a6b2f..f6c707055 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -75,17 +75,8 @@ def get_cmdline_parser() -> argparse.ArgumentParser: FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, FilePathAnalyser, ThirdPartyAPICoverageAnalyser, MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, - SinkCoverageAnalyser, SourceCodeLineAnalyser + SinkCoverageAnalyser ''') - full_parser.add_argument( - '--source-file', - default='', - type=str, - help='Target file path or name for SourceCodeLineAnalyser') - full_parser.add_argument('--line', - default=-1, - type=int, - help='Target line for SourceCodeLineAnalyser') # Report generation command report_parser = subparsers.add_parser( @@ -114,7 +105,7 @@ def get_cmdline_parser() -> argparse.ArgumentParser: FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, FilePathAnalyser, ThirdPartyAPICoverageAnalyser, MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, - SinkCoverageAnalyser, SourceCodeLineAnalyser + SinkCoverageAnalyser """) report_parser.add_argument("--enable-all-analyses", action='store_true', @@ -137,15 +128,6 @@ def get_cmdline_parser() -> argparse.ArgumentParser: nargs="+", default=["FuzzEngineInputAnalysis"], help="State which analysis requires separate json report output") - report_parser.add_argument( - '--source-file', - default='', - type=str, - help='Target file path or name for SourceCodeLineAnalyser') - report_parser.add_argument('--line', - default=-1, - type=int, - help='Target line for SourceCodeLineAnalyser') # Command for correlating binary files to fuzzerLog files correlate_parser = subparsers.add_parser( @@ -169,6 +151,43 @@ def get_cmdline_parser() -> argparse.ArgumentParser: required=True, help='Path to the second report') + # Standalone analyser + analyse_parser = subparsers.add_parser( + 'analyse', help='Standlone analyser commands to run on the target project.') + analyse_parser.add_argument('--target-dir', + type=str, + help='Directory holding source to analyse.', + required=True) + analyse_parser.add_argument('--language', + type=str, + help='Programming of the source code to analyse.', + choices=constants.LANGUAGES_SUPPORTED) + analyse_parser.add_argument('--out-dir', + default='', + type=str, + help='Folder to store analysis results.') + + analyser_parser = analyse_parser.add_subparsers(( + dest='analyser', + required=True, + help='Available analyser: SourceCodeLineAnalyser') + source_code_line_analyser_parser = analyser_parser.add_parser( + 'SourceCodeLineAnalyser', + help=('Provide information in out-dir/function.json for the function' + ' found in the given target file and line number')) + + source_code_line_analyser_parser.add_argument( + '--source-file', + default='', + type=str, + help='Target file path or name for SourceCodeLineAnalyser') + source_code_line_analyser_parser.add_argument( + '--line', + default=-1, + type=int, + help='Target line for SourceCodeLineAnalyser') + + return parser @@ -198,10 +217,6 @@ def main() -> int: logger.info("Running fuzz introspector post-processing") if args.command == 'report': - props: dict[str, Union[str, int]] = {} - props['source_file'] = args.source_file - props['line'] = args.line - return_code = commands.run_analysis_on_dir(args.target_dir, args.coverage_url, args.analyses, @@ -209,8 +224,7 @@ def main() -> int: args.enable_all_analyses, args.name, args.language, - args.output_json, - props=props) + args.output_json) logger.info("Ending fuzz introspector report generation") elif args.command == 'correlate': return_code = commands.correlate_binaries_to_logs(args.binaries_dir) @@ -220,6 +234,8 @@ def main() -> int: return_code = commands.light_analysis(args) elif args.command == 'full': return_code = commands.end_to_end(args) + elif args.command == 'analyse': + return_code = commands.analyse(args) else: return_code = constants.APP_EXIT_ERROR logger.info("Ending fuzz introspector post-processing") @@ -228,4 +244,3 @@ def main() -> int: if __name__ == "__main__": main() -"--enable-all-analyses", diff --git a/src/fuzz_introspector/frontends/frontend_jvm.py b/src/fuzz_introspector/frontends/frontend_jvm.py index bd532ef9f..38e9181a8 100644 --- a/src/fuzz_introspector/frontends/frontend_jvm.py +++ b/src/fuzz_introspector/frontends/frontend_jvm.py @@ -1170,9 +1170,6 @@ def extract_calltree(self, if not visited_functions: visited_functions = set() - if function and '].' not in function: - function = None - if not source_code and function: source_code = self.find_source_with_method(function) diff --git a/src/fuzz_introspector/html_report.py b/src/fuzz_introspector/html_report.py index ce29bd232..140ec5a4a 100644 --- a/src/fuzz_introspector/html_report.py +++ b/src/fuzz_introspector/html_report.py @@ -25,7 +25,6 @@ List, Optional, Tuple, - Union, ) from fuzz_introspector import (analysis, constants, html_constants, @@ -644,7 +643,7 @@ def create_section_all_functions(table_of_contents, tables, proj_profile, def create_section_optional_analyses( table_of_contents, analyses_to_run, output_json, tables, introspection_proj: analysis.IntrospectionProject, basefolder, - coverage_url, conclusions, dump_files, out_dir, props) -> str: + coverage_url, conclusions, dump_files, out_dir) -> str: """Creates the HTML sections containing optional analyses.""" html_report_core = "" logger.info(" - Handling optional analyses") @@ -663,7 +662,7 @@ def create_section_optional_analyses( analysis_name = analysis_interface.get_name() if analysis_name in combined_analyses: analysis_instance = analysis.instantiate_analysis_interface( - analysis_interface, props) + analysis_interface) analysis_instance.dump_files = dump_files # Set display_html flag for the analysis_instance @@ -728,8 +727,7 @@ def create_html_report(introspection_proj: analysis.IntrospectionProject, output_json, report_name, dump_files, - out_dir: str = '', - props: dict[str, Union[str, int]] = {}) -> None: + out_dir: str = '') -> None: """ Logs a complete report. This is the current main place for looking at data produced by fuzz introspector. @@ -785,7 +783,7 @@ def create_html_report(introspection_proj: analysis.IntrospectionProject, table_of_contents, analyses_to_run, output_json, tables, introspection_proj, introspection_proj.proj_profile.basefolder, introspection_proj.proj_profile.coverage_url, conclusions, dump_files, - out_dir, props) + out_dir) # Create HTML showing the conclusions at the top of the report. html_report_top += html_helpers.create_conclusions_box(conclusions) From 169768b55301b870d315a2872edbceb07ceb631b Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 19:30:47 +0000 Subject: [PATCH 09/13] Fix logic with new api in cli Signed-off-by: Arthur Chan --- .../analyses/source_code_line_analyser.py | 33 +++++--- src/fuzz_introspector/analysis.py | 12 +-- src/fuzz_introspector/cli.py | 49 ++++++------ src/fuzz_introspector/commands.py | 76 ++++++++++++++++--- 4 files changed, 117 insertions(+), 53 deletions(-) diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index 0ef6b0515..83e1787a4 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -64,6 +64,11 @@ def set_json_string_result(self, json_string): """ self.json_string_result = json_string + def set_source_file_line(self, source_file: str, source_line: int): + """Configure the source file and source line for this analyser.""" + self.source_file = source_file + self.source_line = source_line + def analysis_func(self, table_of_contents: html_helpers.HtmlTableOfContents, tables: List[str], @@ -71,15 +76,10 @@ def analysis_func(self, profiles: List[fuzzer_profile.FuzzerProfile], basefolder: str, coverage_url: str, conclusions: List[html_helpers.HTMLConclusion], - out_dir) -> str: + out_dir: str) -> str: logger.info(f' - Running analysis {self.get_name()}') - # Get target source file and line - target_source = str(self.properties.get('source_file')) - target_line = self.properties.get('line') - - if not target_source or not isinstance(target_line, - int) or target_line <= 0: + if not self.source_file or self.source_line <= 0: logger.error('No valid source code or target line are provided') return '' @@ -94,14 +94,14 @@ def analysis_func(self, func_list.append(function) func_file_map[function.function_source_file] = func_list - if os.sep in target_source: + if os.sep in self.source_file: # File path - target_func_list = func_file_map.get(target_source, []) + target_func_list = func_file_map.get(self.source_file, []) else: # File name target_func_list = [] for key, value in func_file_map.items(): - if os.path.basename(key) == target_source: + if os.path.basename(key) == self.source_file: target_func_list.extend(value) if not target_func_list: @@ -110,12 +110,21 @@ def analysis_func(self, result_list = [] for func in target_func_list: - if func.function_linenumber <= target_line <= func.function_line_number_end: + start = func.function_linenumber + end = func.function_line_number_end + if start <= self.source_line <= end: logger.info(f'Found function {func.function_name} from line ' - f'{target_line} in {target_source}') + f'{self.source_line} in {self.source_file}') result_list.append(func) if result_list: self.json_results['functions'] = result_list + result_json_path = os.path.join(out_dir, 'functions.json') + logger.info(f'Dumping result to {result_json_path}') + with open(result_json_path, w) as f: + json.dump(self.json_results, f) + else: + logger.info(f'No functions found from line {self.source_line}' + f' in {self.source_file}') return '' diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index bf948faf5..9654714e0 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -230,12 +230,9 @@ def set_display_html(self, is_display_html): self.display_html = is_display_html -def instantiate_analysis_interface(cls: Type[AnalysisInterface], - props: dict[str, Union[str, int]]): +def instantiate_analysis_interface(cls: Type[AnalysisInterface]): """Wrapper function to satisfy Mypy semantics""" - analysis_interface = cls() - analysis_interface.set_additional_properties(props) - return analysis_interface + return cls() class FuzzBranchBlocker: @@ -262,6 +259,11 @@ def get_all_analyses() -> List[Type[AnalysisInterface]]: return analyses.all_analyses +def get_all_standalone_analyses() -> List[Type[AnalysisInterface]]: + from fuzz_introspector import analyses + return analyses.standalone_analyses + + def callstack_get_parent(n: cfg_load.CalltreeCallsite, c: Dict[int, str]) -> str: return c[int(n.depth) - 1] diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index f6c707055..5d7d231b9 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -153,29 +153,18 @@ def get_cmdline_parser() -> argparse.ArgumentParser: # Standalone analyser analyse_parser = subparsers.add_parser( - 'analyse', help='Standlone analyser commands to run on the target project.') - analyse_parser.add_argument('--target-dir', - type=str, - help='Directory holding source to analyse.', - required=True) - analyse_parser.add_argument('--language', - type=str, - help='Programming of the source code to analyse.', - choices=constants.LANGUAGES_SUPPORTED) - analyse_parser.add_argument('--out-dir', - default='', - type=str, - help='Folder to store analysis results.') - - analyser_parser = analyse_parser.add_subparsers(( + 'analyse', + help='Standlone analyser commands to run on the target project.') + + analyser_parser = analyse_parser.add_subparsers( dest='analyser', required=True, help='Available analyser: SourceCodeLineAnalyser') + source_code_line_analyser_parser = analyser_parser.add_parser( 'SourceCodeLineAnalyser', help=('Provide information in out-dir/function.json for the function' ' found in the given target file and line number')) - source_code_line_analyser_parser.add_argument( '--source-file', default='', @@ -186,7 +175,21 @@ def get_cmdline_parser() -> argparse.ArgumentParser: default=-1, type=int, help='Target line for SourceCodeLineAnalyser') - + source_code_line_analyser_parser.add_argument( + '--target-dir', + type=str, + help='Directory holding source to analyse.', + required=True) + source_code_line_analyser_parser.add_argument( + '--language', + type=str, + help='Programming of the source code to analyse.', + choices=constants.LANGUAGES_SUPPORTED) + source_code_line_analyser_parser.add_argument( + '--out-dir', + default='', + type=str, + help='Folder to store analysis results.') return parser @@ -217,14 +220,10 @@ def main() -> int: logger.info("Running fuzz introspector post-processing") if args.command == 'report': - return_code = commands.run_analysis_on_dir(args.target_dir, - args.coverage_url, - args.analyses, - args.correlation_file, - args.enable_all_analyses, - args.name, - args.language, - args.output_json) + return_code = commands.run_analysis_on_dir( + args.target_dir, args.coverage_url, args.analyses, + args.correlation_file, args.enable_all_analyses, args.name, + args.language, args.output_json) logger.info("Ending fuzz introspector report generation") elif args.command == 'correlate': return_code = commands.correlate_binaries_to_logs(args.binaries_dir) diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index facd983ba..87f151054 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -18,7 +18,7 @@ import json import yaml import shutil -from typing import Optional, Union +from typing import Optional from fuzz_introspector import analysis from fuzz_introspector import constants @@ -72,10 +72,6 @@ def end_to_end(args) -> int: else: language = args.language - props: dict[str, Union[str, int]] = {} - props['source_file'] = args.source_file - props['line'] = args.line - return run_analysis_on_dir(target_folder=out_dir, coverage_url=args.coverage_url, analyses_to_run=args.analyses, @@ -83,8 +79,7 @@ def end_to_end(args) -> int: enable_all_analyses=(not args.analyses), report_name=args.name, language=language, - out_dir=out_dir, - props=props) + out_dir=out_dir) def run_analysis_on_dir(target_folder: str, @@ -97,8 +92,7 @@ def run_analysis_on_dir(target_folder: str, output_json: Optional[list[str]] = None, parallelise: bool = True, dump_files: bool = True, - out_dir: str = '', - props: dict[str, Union[str, int]] = {}) -> int: + out_dir: str = '') -> int: """Runs Fuzz Introspector analysis from based on the results from a frontend run. The primary task is to aggregate the data and generate a HTML report.""" @@ -123,8 +117,7 @@ def run_analysis_on_dir(target_folder: str, output_json, report_name, dump_files, - out_dir=out_dir, - props=props) + out_dir=out_dir) return constants.APP_EXIT_SUCCESS @@ -160,3 +153,64 @@ def light_analysis(args) -> int: f.write(json.dumps(list(all_source_files))) return 0 + + +def analyse(args) -> int: + """Perform a light analysis using the chosen Analyser and return + json results.""" + # Retrieve the correct analyser + target_analyser = None + for analyser in analysis.get_all_standalone_analyses(): + if analyser.get_name() == args.analyser: + target_analyser = analysis.instantiate_analysis_interface(analyser) + break + + # Return error if analyser not found + if not target_analyser: + logger.error(f'Analyser {args.analyser} not found.') + return constants.APP_EXIT_ERROR + + # Auto detect project language is not provided + if not args.language: + args.language = utils.detect_language(args.target_dir) + + # Prepare out directory + if args.out_dir: + out_dir = args.out_dir + else: + out_dir = os.getcwd() + + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + # Fix entrypoint default for languages + if args.language == constants.LANGUAGES.JAVA: + entrypoint = 'fuzzerTestOneInput' + else: + entrypoint = 'LLVMFuzzerTestOneInput' + + # Run the frontend + oss_fuzz.analyse_folder(language=args.language, + directory=args.target_dir, + entrypoint=entrypoint, + out=out_dir) + + # Perform the FI backend project analysis from the frontend + introspection_proj = analysis.IntrospectionProject(args.language, out_dir, + '') + introspection_proj.load_data_files(True, '', out_dir) + + # Perform the chosen standalone analysis + if target_analyser.get_name() == 'SourceCodeLineAnalyser': + source_file = args.source_file + source_line = args.source_line + + target_analyser.set_source_file_line(source_file, source_line) + target_analyser.analysis_func(html_helpers.HtmlTableOfContents(), [], + introspection_proj.proj_profile, + introspection_proj.profiles, '', '', [], + out_dir) + + #TODO Add more analyser for standalone run + + return constants.APP_EXIT_SUCCESS From ebaa63d005658c11c301d4479d2640b8561bcee4 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 19:55:59 +0000 Subject: [PATCH 10/13] Fix logic and formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/__init__.py | 19 ++++++++- .../analyses/source_code_line_analyser.py | 6 +-- src/fuzz_introspector/cli.py | 2 +- src/fuzz_introspector/commands.py | 1 + .../datatypes/function_profile.py | 39 +++++++++++++++++++ 5 files changed, 61 insertions(+), 6 deletions(-) diff --git a/src/fuzz_introspector/analyses/__init__.py b/src/fuzz_introspector/analyses/__init__.py index 9c4ca71ad..b057d260a 100644 --- a/src/fuzz_introspector/analyses/__init__.py +++ b/src/fuzz_introspector/analyses/__init__.py @@ -1,3 +1,18 @@ +# Copyright 2025 Fuzz Introspector Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from fuzz_introspector import analysis from fuzz_introspector.analyses import bug_digestor from fuzz_introspector.analyses import driver_synthesizer from fuzz_introspector.analyses import engine_input @@ -12,7 +27,7 @@ # All optional analyses. # Ordering here is important as top analysis will be shown first in the report -all_analyses = [ +all_analyses: list[type[analysis.AnalysisInterface]] = [ optimal_targets.OptimalTargets, engine_input.EngineInput, runtime_coverage_analysis.RuntimeCoverageAnalysis, @@ -27,6 +42,6 @@ # This is the list of analyses that are meant to run # directly from CLI without the need to generate HTML reports -standalone_analyses = [ +standalone_analyses: list[type[analysis.AnalysisInterface]] = [ source_code_line_analyser.SourceCodeLineAnalyser, ] diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index 83e1787a4..7f823ad18 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -106,7 +106,7 @@ def analysis_func(self, if not target_func_list: logger.error('Failed to locate the target source file ' - f'{target_source} from the project.') + f'{self.source_file} from the project.') result_list = [] for func in target_func_list: @@ -115,13 +115,13 @@ def analysis_func(self, if start <= self.source_line <= end: logger.info(f'Found function {func.function_name} from line ' f'{self.source_line} in {self.source_file}') - result_list.append(func) + result_list.append(func.to_dict()) if result_list: self.json_results['functions'] = result_list result_json_path = os.path.join(out_dir, 'functions.json') logger.info(f'Dumping result to {result_json_path}') - with open(result_json_path, w) as f: + with open(result_json_path, 'w') as f: json.dump(self.json_results, f) else: logger.info(f'No functions found from line {self.source_line}' diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index 5d7d231b9..024d72e17 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -171,7 +171,7 @@ def get_cmdline_parser() -> argparse.ArgumentParser: type=str, help='Target file path or name for SourceCodeLineAnalyser') source_code_line_analyser_parser.add_argument( - '--line', + '--source-line', default=-1, type=int, help='Target line for SourceCodeLineAnalyser') diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index 87f151054..0c05759fd 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -23,6 +23,7 @@ from fuzz_introspector import analysis from fuzz_introspector import constants from fuzz_introspector import diff_report +from fuzz_introspector import html_helpers from fuzz_introspector import html_report from fuzz_introspector import utils diff --git a/src/fuzz_introspector/datatypes/function_profile.py b/src/fuzz_introspector/datatypes/function_profile.py index a882068d5..feca0f710 100644 --- a/src/fuzz_introspector/datatypes/function_profile.py +++ b/src/fuzz_introspector/datatypes/function_profile.py @@ -98,6 +98,45 @@ def __init__(self, elem: Dict[Any, Any]) -> None: self.new_unreached_complexity: int = 0 self.total_cyclomatic_complexity: int = 0 + def to_dict(self) -> Dict[str, Any]: + return { + "function_name": self.function_name, + "raw_function_name": self.raw_function_name, + "function_source_file": self.function_source_file, + "linkage_type": self.linkage_type, + "function_linenumber": self.function_linenumber, + "function_line_number_end": self.function_line_number_end, + "return_type": self.return_type, + "arg_count": self.arg_count, + "arg_types": self.arg_types, + "arg_names": self.arg_names, + "bb_count": self.bb_count, + "i_count": self.i_count, + "edge_count": self.edge_count, + "cyclomatic_complexity": self.cyclomatic_complexity, + "functions_reached": self.functions_reached, + "function_uses": self.function_uses, + "function_depth": self.function_depth, + "constants_touched": self.constants_touched, + "branch_profiles": + {k: str(v) + for k, v in self.branch_profiles.items()}, + "signature": self.signature, + "functions_called": self.functions_called, + "is_accessible": self.is_accessible, + "is_jvm_library": self.is_jvm_library, + "is_enum": self.is_enum, + "is_static": self.is_static, + "exceptions": self.exceptions, + "need_close": self.need_close, + "callsite": self.callsite, + "hitcount": self.hitcount, + "reached_by_fuzzers": self.reached_by_fuzzers, + "incoming_references": self.incoming_references, + "new_unreached_complexity": self.new_unreached_complexity, + "total_cyclomatic_complexity": self.total_cyclomatic_complexity + } + @property def has_source_file(self) -> bool: return len(self.function_source_file.strip()) > 0 From 54a601b3e95e05927af99662aa9ec122b4688046 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 20:07:33 +0000 Subject: [PATCH 11/13] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/cli.py | 2 -- src/fuzz_introspector/commands.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index 024d72e17..8c1c3bdf2 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -20,8 +20,6 @@ from fuzz_introspector import commands, constants -from typing import Union - sys.setrecursionlimit(10000) logger = logging.getLogger(name=__name__) diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index 0c05759fd..3a6008c37 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -212,6 +212,6 @@ def analyse(args) -> int: introspection_proj.profiles, '', '', [], out_dir) - #TODO Add more analyser for standalone run + # TODO Add more analyser for standalone run return constants.APP_EXIT_SUCCESS From 8ed7fe7f8d346b7a6ae8d072d39600c1ef7de587 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 20:22:35 +0000 Subject: [PATCH 12/13] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/cli.py | 11 ----------- src/fuzz_introspector/commands.py | 4 ++-- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index 8c1c3bdf2..7d2f3cdeb 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -64,17 +64,6 @@ def get_cmdline_parser() -> argparse.ArgumentParser: default='', type=str, help='Base coverage URL.') - full_parser.add_argument('--analyses', - nargs='+', - default=[], - help=''' - Analyses to run. Available options: - AnnotatedCFG, BugDigestorAnalysis, FuzzCalltreeAnalysis, - FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, - FilePathAnalyser, ThirdPartyAPICoverageAnalyser, - MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, - SinkCoverageAnalyser - ''') # Report generation command report_parser = subparsers.add_parser( diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index 3a6008c37..1f57c539b 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -75,9 +75,9 @@ def end_to_end(args) -> int: return run_analysis_on_dir(target_folder=out_dir, coverage_url=args.coverage_url, - analyses_to_run=args.analyses, + analyses_to_run=[], correlation_file='', - enable_all_analyses=(not args.analyses), + enable_all_analyses=True, report_name=args.name, language=language, out_dir=out_dir) From 32ce84502ab3933681f2bb8a7b30f8855d91d894 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 21:49:50 +0000 Subject: [PATCH 13/13] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/__init__.py | 1 + .../analyses/source_code_line_analyser.py | 31 +++++++++++-------- src/fuzz_introspector/commands.py | 2 +- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/fuzz_introspector/analyses/__init__.py b/src/fuzz_introspector/analyses/__init__.py index b057d260a..164164ccc 100644 --- a/src/fuzz_introspector/analyses/__init__.py +++ b/src/fuzz_introspector/analyses/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Initialisation of AnalysisInterface instances""" from fuzz_introspector import analysis from fuzz_introspector.analyses import bug_digestor diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py index 7f823ad18..8274d3f95 100644 --- a/src/fuzz_introspector/analyses/source_code_line_analyser.py +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Analysis plugin for introspection of the function on target line in target source file.""" +"""Analysis plugin for introspection of the function on target line in +target source file.""" import os import json @@ -28,10 +29,12 @@ class SourceCodeLineAnalyser(analysis.AnalysisInterface): + """Locate for the function in given line of given source file.""" + name: str = 'SourceCodeLineAnalyser' def __init__(self): - self.json_results: Dict[str, Any] = dict() + self.json_results: Dict[str, Any] = {} self.json_string_result = '' @classmethod @@ -54,7 +57,7 @@ def get_json_string_result(self) -> str: return self.json_string_result return json.dumps(self.json_results) - def set_json_string_result(self, json_string): + def set_json_string_result(self, string): """Store the result of this analyser as json string result for further processing in a later time. @@ -62,7 +65,7 @@ def set_json_string_result(self, json_string): processing result of the analyser for future use :type json_string: str """ - self.json_string_result = json_string + self.json_string_result = string def set_source_file_line(self, source_file: str, source_line: int): """Configure the source file and source line for this analyser.""" @@ -77,7 +80,7 @@ def analysis_func(self, basefolder: str, coverage_url: str, conclusions: List[html_helpers.HTMLConclusion], out_dir: str) -> str: - logger.info(f' - Running analysis {self.get_name()}') + logger.info(' - Running analysis %s', self.get_name()) if not self.source_file or self.source_line <= 0: logger.error('No valid source code or target line are provided') @@ -87,7 +90,7 @@ def analysis_func(self, all_functions = list(proj_profile.all_functions.values()) all_functions.extend(proj_profile.all_constructors.values()) - # Generate a Source File to Function Profile map and store in JSON Result + # Generate SourceFile to Function Profile map and store in JSON Result func_file_map: dict[str, list[function_profile.FunctionProfile]] = {} for function in all_functions: func_list = func_file_map.get(function.function_source_file, []) @@ -105,26 +108,28 @@ def analysis_func(self, target_func_list.extend(value) if not target_func_list: - logger.error('Failed to locate the target source file ' - f'{self.source_file} from the project.') + logger.error( + 'Failed to locate the target source file %s from the project.', + self.source_file) result_list = [] for func in target_func_list: start = func.function_linenumber end = func.function_line_number_end if start <= self.source_line <= end: - logger.info(f'Found function {func.function_name} from line ' - f'{self.source_line} in {self.source_file}') + logger.info('Found function %s from line %d in %s', + func.function_name, self.source_line, + self.source_file) result_list.append(func.to_dict()) if result_list: self.json_results['functions'] = result_list result_json_path = os.path.join(out_dir, 'functions.json') - logger.info(f'Dumping result to {result_json_path}') + logger.info('Dumping result to %s', result_json_path) with open(result_json_path, 'w') as f: json.dump(self.json_results, f) else: - logger.info(f'No functions found from line {self.source_line}' - f' in {self.source_file}') + logger.info('No functions found from line %d in %s', + self.source_line, self.source_file) return '' diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index 1f57c539b..db0dc5ee8 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -168,7 +168,7 @@ def analyse(args) -> int: # Return error if analyser not found if not target_analyser: - logger.error(f'Analyser {args.analyser} not found.') + logger.error('Analyser %s not found.', args.analyser) return constants.APP_EXIT_ERROR # Auto detect project language is not provided