diff --git a/experiment/builder_runner.py b/experiment/builder_runner.py index 99dc718969..aeb2a32014 100644 --- a/experiment/builder_runner.py +++ b/experiment/builder_runner.py @@ -119,6 +119,13 @@ def _pre_build_check(self, target_path: str, return False return True + def build_and_run_python(self, generated_project: str, target_path: str): + build_result = BuildResult() + + self.build_target_local(generated_project, + "/tmp/log.txt", + language='python') + def build_and_run(self, generated_project: str, target_path: str, iteration: int) -> tuple[BuildResult, Optional[RunResult]]: """Builds and runs the fuzz target for fuzzing.""" @@ -145,6 +152,36 @@ def build_and_run(self, generated_project: str, target_path: str, generated_project, benchmark_target_name)) return build_result, run_result + def run_target_local_python(self, generated_project: str, target_name: str, + log_path: str): + """Runs a target in the fixed target directory.""" + # If target name is not overridden, use the basename of the target path + # in the Dockerfile. + print(f'Running {target_name}') + command = [ + 'python3', 'infra/helper.py', 'run_fuzzer', generated_project, + target_name, '--' + ] + self._libfuzzer_args() + + with open(log_path, 'w') as f: + proc = sp.Popen(command, + stdin=sp.DEVNULL, + stdout=f, + stderr=sp.STDOUT, + cwd=oss_fuzz_checkout.OSS_FUZZ_DIR) + + # TODO(ochang): Handle the timeout exception. + try: + proc.wait(timeout=self.run_timeout + 5) + except sp.TimeoutExpired: + print(f'{generated_project} timed out during fuzzing.') + # Try continuing and parsing the logs even in case of timeout. + + if proc.returncode != 0: + print(f'********** Failed to run {generated_project}. **********') + else: + print(f'Successfully run {generated_project}.') + def run_target_local(self, generated_project: str, benchmark_target_name: str, log_path: str): """Runs a target in the fixed target directory.""" @@ -179,7 +216,8 @@ def run_target_local(self, generated_project: str, benchmark_target_name: str, def build_target_local(self, generated_project: str, log_path: str, - sanitizer: str = 'address') -> bool: + sanitizer: str = 'address', + language: str = 'cpp') -> bool: """Builds a target with OSS-Fuzz.""" print(f'Building {generated_project} with {sanitizer}') command = [ @@ -199,6 +237,14 @@ def build_target_local(self, print(f'Failed to build image for {generated_project}') return False + if language == 'python': + command = 'python3 infra/helper.py build_fuzzers %s' % (generated_project) + try: + sp.check_call(command, shell=True, cwd=oss_fuzz_checkout.OSS_FUZZ_DIR) + except sp.CalledProcessError: + return False + return True + outdir = get_outdir(generated_project) command = [ 'docker', diff --git a/experiment/evaluator.py b/experiment/evaluator.py index 1a1f3b24b1..485e9c6ade 100644 --- a/experiment/evaluator.py +++ b/experiment/evaluator.py @@ -269,7 +269,8 @@ def _parse_libfuzzer_logs( lines = fuzzlog.split('\n') except MemoryError as e: # Some logs from abnormal drivers are too large to be parsed. - logger.log('%s is too large to parse: %s', log_handle.name, e) + if logger: + logger.log('%s is too large to parse: %s', log_handle.name, e) return 0, 0, False, True, 'LOG_MESS_UP' cov_pcs = 0 diff --git a/experiment/oss_fuzz_checkout.py b/experiment/oss_fuzz_checkout.py index f3c93d0a4f..14b46fc62b 100644 --- a/experiment/oss_fuzz_checkout.py +++ b/experiment/oss_fuzz_checkout.py @@ -46,7 +46,7 @@ def _remove_temp_oss_fuzz_repo(): logging.warning('No OSS-Fuzz directory %s', OSS_FUZZ_DIR) -def _set_temp_oss_fuzz_repo(): +def _set_temp_oss_fuzz_repo(delete_at_exit: bool): """Creates a temporary directory for OSS-Fuzz repo and update |OSS_FUZZ_DIR|. """ # Holding the temp directory in a global object to ensure it won't be deleted @@ -55,7 +55,8 @@ def _set_temp_oss_fuzz_repo(): GLOBAL_TEMP_DIR = tempfile.mkdtemp() global OSS_FUZZ_DIR OSS_FUZZ_DIR = GLOBAL_TEMP_DIR - atexit.register(_remove_temp_oss_fuzz_repo) + if delete_at_exit: + atexit.register(_remove_temp_oss_fuzz_repo) _clone_oss_fuzz_repo() @@ -75,10 +76,10 @@ def _clone_oss_fuzz_repo(): print(stderr) -def clone_oss_fuzz(temp_repo: bool = True): +def clone_oss_fuzz(temp_repo: bool = True, delete_at_exit: bool = True): """Clones the OSS-Fuzz repository.""" if temp_repo: - _set_temp_oss_fuzz_repo() + _set_temp_oss_fuzz_repo(delete_at_exit) if not os.path.exists(OSS_FUZZ_DIR): _clone_oss_fuzz_repo() # Remove existing targets. diff --git a/python_fuzzgen/README.md b/python_fuzzgen/README.md new file mode 100644 index 0000000000..957859f204 --- /dev/null +++ b/python_fuzzgen/README.md @@ -0,0 +1,9 @@ +# Python auto-gen + +Logic for auto-generating python fuzzers. + +Sample: + +```sh +python3 -m python_fuzzgen.build -r https://github.com/html5lib/html5lib-python -l mylog1.txt -m 50 +``` diff --git a/python_fuzzgen/__init__.py b/python_fuzzgen/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python_fuzzgen/build.py b/python_fuzzgen/build.py new file mode 100644 index 0000000000..519d508b40 --- /dev/null +++ b/python_fuzzgen/build.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Auto-generate OSS-Fuzz project for a Python GitHub repository.""" + +import os +import sys +import json +import yaml +import openai +import argparse +import subprocess as sp + +from experiment import oss_fuzz_checkout, builder_runner, evaluator +from python_fuzzgen import oss_fuzz_templates + +from typing import Any, List + + +def prepare_oss_fuzz_pre_analysis_project(github_project: str, + autogen_project: str): + """Create OSS-Fuzz project with build.sh template for running Introspector statically.""" + base_python_project = oss_fuzz_templates.DOCKERFILE_PYTHON_INTROSPECTOR.replace( + "TARGET_REPO", github_project) + + build_python_project = oss_fuzz_templates.BUILD_PYTHON_INTROSPECTOR + project_yaml_project = oss_fuzz_templates.PROJECT_YAML_PYTHON_INTROSPETOR.replace( + "TARGET_REPO", github_project) + fuzz_project = oss_fuzz_templates.FUZZ_TEMPLATE_PYTHON + + # Find the next auto-fuzz dir + project_dir = os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, "projects", + autogen_project) + print("Creating project: %s" % (project_dir)) + if not os.path.isdir(project_dir): + os.mkdir(project_dir) + + # Write template files + with open(os.path.join(project_dir, 'Dockerfile'), 'w') as f: + f.write(base_python_project) + + with open(os.path.join(project_dir, 'build.sh'), 'w') as f: + f.write(build_python_project) + + with open(os.path.join(project_dir, 'project.yaml'), 'w') as f: + f.write(project_yaml_project) + + with open(os.path.join(project_dir, 'fuzz_1.py'), 'w') as f: + f.write(fuzz_project) + + return autogen_project + + +def get_next_autofuzz_dir(): + print("OSS-Fuzz dir: %s" % (oss_fuzz_checkout.OSS_FUZZ_DIR)) + auto_gen = 'autofuzz-dir-' + max_idx = -1 + for l in os.listdir(os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, "projects")): + if l.startswith(auto_gen): + tmp_dir_idx = int(l.replace(auto_gen, "")) + if tmp_dir_idx > max_idx: + max_idx = tmp_dir_idx + return '%s%d' % (auto_gen, max_idx + 1) + + +def run_oss_fuzz_build(project_dir): + cmd = "python3 infra/helper.py build_fuzzers %s" % (project_dir) + try: + sp.check_call(cmd, + shell=True, + cwd=oss_fuzz_checkout.OSS_FUZZ_DIR, + stdout=sp.DEVNULL, + stderr=sp.DEVNULL) + except sp.CalledProcessError: + return False + return True + + +def load_introspector_functions_output(autogen_project): + """For a given OSS-Fuzz project, read the Fuzz Introspector output.""" + # Target dir + introspector_output = os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'build', + 'out', autogen_project, + 'fuzzerLogFile-fuzz_1.data.yaml') + + with open(introspector_output, 'r') as f: + introspector_analysis = yaml.safe_load(f) + + all_introspector_funcs = introspector_analysis['All functions']['Elements'] + + return all_introspector_funcs + + +def create_sample_harness(github_repo: str, func_elem): + + prompt_template = """Hi, I'm looking for your help to write a Python fuzzing harness for the %s Python project. The project is located at %s and I would like you to write a harness targeting this module. You should use the Python Atheris framework for writing the fuzzer. Could you please show me the source code for this harness? + + The specific function you should target is %s and please wrap all code in tags. + + I only want the actual harness function that passes the fuzzer's data into the target function and not a whole Python module. This function should be called "fuzz_%s" and you should only show this code. Please do not show any other code. + + The harness should handle any exceptions and must include the code: +``` + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() +``` + There should be no call at all to `with atheris.instrumented_function()` and the harness function should not involve calls to functions the atheris module. + + Finally, could you make sure that the following is used to seed the fuzz data? `atheris.FuzzedDataProvider(data)` and `fdp.ConsumeUnicodeNoSurrogates(1024)`. + + The function signature for the target is %s and please wrap all code in tags.""" % ( + github_repo.split("/")[-1], github_repo, func_elem['functionName'], + github_repo.split("/")[-1].replace("-", "_"), func_elem['functionName']) + completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", + messages=[ + { + "role": "system", + "content": prompt_template + }, + ]) + fuzzer_source = completion.choices[0].message.content.replace( + "", "").replace("", "").replace("```python", + "").replace("```", "") + + #print(">"*45) + #print(fuzzer_source) + #print(">"*45) + return fuzzer_source + + +def build_and_evalute_fuzzer_harnesses(fuzzer_sources, autogen_project, + log_file, github_project): + """Builds a Python project, runs each of the fuzzers built and logs stats.""" + idx = 0 + for idx in range(len(fuzzer_sources)): + print("------") + print(oss_fuzz_checkout.OSS_FUZZ_DIR) + fuzzer_path = os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, "projects", + autogen_project, "fuzz_%d.py" % (idx)) + with open(fuzzer_path, "w") as f: + f.write(fuzzer_sources[idx]) + + # Refined build + with open( + os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, "projects", autogen_project, + "build.sh"), "w") as f: + f.write(oss_fuzz_templates.BUILD_PYTHON_HARNESSES) + + # Build fuzzers + python_runner = builder_runner.BuilderRunner("fuzz_1", "ww", 60) + python_runner.build_and_run_python(autogen_project, "fuzz_%d" % (idx)) + + # Run each of the fuzzers + stat_list = list() + for idx in range(len(fuzzer_sources)): + print("------") + print(oss_fuzz_checkout.OSS_FUZZ_DIR) + print("Running") + + fuzz_logs = "/tmp/fuzz-%d-log.txt" % (idx) + python_runner.run_target_local_python(autogen_project, "fuzz_%d" % (idx), + fuzz_logs) + + valuator = evaluator.Evaluator(None, None, None) + + with open(fuzz_logs, 'rb') as f: + cov_pcs, total_pcs, crashes, is_driver_fuzz_err, driver_fuzz_err = valuator._parse_libfuzzer_logs( + f, None) + + stats = { + 'cov_pcs': cov_pcs, + 'total_pcs': total_pcs, + 'crashes': crashes, + 'is_driver_fuzz_err': is_driver_fuzz_err, + 'driver_fuzz_err': driver_fuzz_err + } + + result_status = { + 'stats': stats, + 'idx': idx, + 'fuzzer-source': fuzzer_sources[idx] + } + stat_list.append(result_status) + + #print("cov_pcs: {%d} -- total_pcs: {%d}"%(cov_pcs, total_pcs)) + #builder_runner.build_and_run(autogen_project, "fuzz_%d"%(idx), 0) + + if log_file: + with open(log_file, "w") as f: + f.write("Target: %s\n" % (github_project)) + f.write("# High level stats\n") + for stat in sorted(stat_list, key=lambda x: x['stats']['cov_pcs']): + f.write("idx: %d -- cov_pcs: %d\n" % + (stat['idx'], stat['stats']['cov_pcs'])) + f.write("\n") + f.write("-" * 45 + "\n") + f.write("# Fuzzer sources\n") + for stat in sorted(stat_list, key=lambda x: x['stats']['cov_pcs']): + f.write("idx: %d -- cov_pcs: %d\n" % + (stat['idx'], stat['stats']['cov_pcs'])) + f.write("-" * 45 + "\n") + f.write(stat['fuzzer-source']) + f.write("\n") + f.write("-" * 45 + "\n") + + print("Total stats:") + for stat in sorted(stat_list, key=lambda x: x['stats']['cov_pcs']): + print("idx: %d -- cov_pcs: %d" % (stat['idx'], stat['stats']['cov_pcs'])) + + +def perform_pre_analysis(github_project, autogen_project): + """Creates an OSS-Fuzz project for a project and runs introspector + statically on it within the OSS-Fuzz environment.""" + autogen_project = prepare_oss_fuzz_pre_analysis_project( + github_project, autogen_project) + + # Run the build + if not run_oss_fuzz_build(autogen_project): + return None + + # Extract the harnesses logic + all_introspector_funcs = load_introspector_functions_output(autogen_project) + + return all_introspector_funcs + + +def generate_python_fuzzers(github_project: str, introspector_funcs: List[Any], + max_targets: int) -> List[str]: + """Runs Python fuzzer harness generation on a list of function from fuzz introspector.""" + idx = 0 + fuzzer_sources = [] + for func in introspector_funcs: + idx += 1 + if idx >= max_targets: + break + harness_source = create_sample_harness(github_project, func) + fuzzer_sources.append(harness_source) + return fuzzer_sources + + +def auto_fuzz_from_scratch(github_repo: str, log_file: str, max_targets: int): + """Auto-generates an OSS-Fuzz project with |max_targets| fuzzers and evalutes the fuzzers.""" + oss_fuzz_checkout.clone_oss_fuzz(delete_at_exit = False) + + autogen_project = get_next_autofuzz_dir() + + introspector_funcs = perform_pre_analysis(github_repo, autogen_project) + if not introspector_funcs: + return False + + fuzzer_sources = generate_python_fuzzers(github_repo, introspector_funcs, + max_targets) + if len(fuzzer_sources) == 0: + print("Could not generate any fuzzers") + return + + # Build all the fuzzers and evaluate each of them + build_and_evalute_fuzzer_harnesses(fuzzer_sources, autogen_project, log_file, + github_repo) + + # Show output + print("------") + print("OSS-Fuzz dir used: %s" % (oss_fuzz_checkout.OSS_FUZZ_DIR)) + print("Auto-generated project: %s" % (os.path.join( + oss_fuzz_checkout.OSS_FUZZ_DIR, "projects", autogen_project))) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-r", + "--repo", + help="Target repository to fuzz.", + default='') + parser.add_argument("-l", "--log-file", help='Log file.', default=None) + parser.add_argument("-m", + "--max-targets", + help='Max number of function targets', + default=40, + type=int) + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + auto_fuzz_from_scratch(args.repo, args.log_file, args.max_targets) + + +if __name__ == "__main__": + main() diff --git a/python_fuzzgen/oss_fuzz_templates.py b/python_fuzzgen/oss_fuzz_templates.py new file mode 100644 index 0000000000..198cf435dd --- /dev/null +++ b/python_fuzzgen/oss_fuzz_templates.py @@ -0,0 +1,70 @@ +DOCKERFILE_PYTHON_INTROSPECTOR = """FROM gcr.io/oss-fuzz-base/base-builder-python +RUN apt-get install -y python3.9 python3.9-dev && \\ + ln --force -s /usr/bin/python3.9 /usr/local/bin/python3 && \\ + apt-get install -y python3-pip && \\ + python3 -m pip install cython virtualenv +RUN python3 -m pip install --upgrade pip setuptools meson ninja numpy pybind11 cython pythran setuptools_scm +RUN git clone https://github.com/ossf/fuzz-introspector $SRC/fuzz-introspector && \\ + cd fuzz-introspector && \\ + git submodule init && \\ + git submodule update && \\ + python3 -m pip install -r ./requirements.txt && \\ + python3 -m pip install frontends/python/PyCG + +#RUN python3 -m pip install -r /fuzz-introspector/requirements.txt +#RUN python3 -m pip install /fuzz-introspector/frontends/python/PyCG + +RUN git clone TARGET_REPO $SRC/proj +COPY *.sh *.py $SRC/ +WORKDIR $SRC/fuzz-introspector/frontends/python +""" + + +BUILD_PYTHON_INTROSPECTOR="""python3 main.py --fuzzer $SRC/fuzz_1.py --package=$SRC/proj +cp ./fuzzerLogFile-fuzz_1.data.yaml $OUT/ +""" + +BUILD_PYTHON_HARNESSES="""cd $SRC/proj + +python3 -m pip install . + +mkdir -p $SRC/fuzzer-builds +cd $SRC/fuzzer-builds +cp $SRC/fuzz_*.py . + +for fuzzer in $(find . -name 'fuzz_*.py'); do + # Compile the fuzzer but do not care if the build fails. + compile_python_fuzzer $fuzzer || true +done +""" + + +PROJECT_YAML_PYTHON_INTROSPETOR="""homepage: https://google.com +main_repo: TARGET_REPO +language: python +fuzzing_engines: +- libfuzzer +sanitizers: +- address +- undefined +primary_contacts: oss-fuzz-gen@google.com""" + + +FUZZ_TEMPLATE_PYTHON="""import sys +import atheris + + +@atheris.instrument_func +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() +"""