Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Frontend] Generalise source code discovery #1991

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 12 additions & 38 deletions src/fuzz_introspector/frontends/frontend_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
"""Fuzz Introspector Light frontend"""

import os
import pathlib

import logging

Expand Down Expand Up @@ -643,38 +642,23 @@ def get_linenumber(self, bytepos):
return -1


def capture_source_files_in_tree(directory_tree, language):
"""Captures source code files in a given directory."""
language_extensions = {'c': ['.c', '.h']}
language_files = []
paths_to_avoid = [
'/src/aflplusplus', '/src/honggfuzz', '/src/libfuzzer', '/src/fuzztest'
]
def load_treesitter_trees(source_files: list[str],
is_log: bool = True) -> list[SourceCodeFile]:
"""Creates treesitter trees for all files in a given list of source files."""
results = []

for dirpath, _dirnames, filenames in os.walk(directory_tree):
if any([x for x in paths_to_avoid if dirpath.startswith(x)]):
for code_file in source_files:
if not os.path.isfile(code_file):
continue
for filename in filenames:
for extensions in language_extensions[language]:
if pathlib.Path(filename).suffix in extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files

source_cls = SourceCodeFile(code_file, 'c')

def load_treesitter_trees(source_files, log_harnesses=True):
"""Creates treesitter trees for all files in a given list of source files."""
results = []
if is_log:
if source_cls.has_libfuzzer_harness():
logger.info('harness: %s', code_file)

results.append(source_cls)

for language in source_files:
if language == 'c':
for code_file in source_files[language]:
if not os.path.isfile(code_file):
continue
source_cls = SourceCodeFile(code_file, language)
if log_harnesses:
if source_cls.has_libfuzzer_harness():
logger.info('harness: %s', code_file)
results.append(source_cls)
return results


Expand All @@ -684,13 +668,3 @@ def analyse_source_code(source_content: str) -> SourceCodeFile:
language='c',
source_content=source_content.encode())
return source_code


def analyse_folder(folder_path: str, language: str = 'c') -> Project:
"""Constructs a project based on the source code in a folder."""
source_files = {}
source_files[language] = capture_source_files_in_tree(
folder_path, language)
source_codes = load_treesitter_trees(source_files)
project = Project(source_codes)
return project
25 changes: 0 additions & 25 deletions src/fuzz_introspector/frontends/frontend_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from typing import Any, Optional, Set, List

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -898,30 +897,6 @@ def _recursive_function_depth(function: FunctionDefinition) -> int:
return func_depth


def capture_source_files_in_tree(directory_tree):
"""Captures source code files in a given directory."""
language_files = []
language_extensions = [
'.c', '.cpp', '.cc', '.c++', '.cxx', '.h', '.hpp', '.hh', '.hxx',
'.inl'
]
exclude_directories = [
'build', 'target', 'node_modules', 'aflplusplus', 'honggfuzz',
'inspector', 'libfuzzer', 'fuzztest'
]

for dirpath, _, filenames in os.walk(directory_tree):
# Skip some non project directories
if any(exclude in dirpath for exclude in exclude_directories):
continue

for filename in filenames:
if pathlib.Path(filename).suffix.lower() in language_extensions:
language_files.append(os.path.join(dirpath, filename))

return language_files


def load_treesitter_trees(source_files, is_log=True):
"""Creates treesitter trees for all files in a given list of source files."""
results = []
Expand Down
13 changes: 0 additions & 13 deletions src/fuzz_introspector/frontends/frontend_go.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

from typing import Optional

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -786,17 +784,6 @@ def extract_callsites(self, all_funcs_meths: dict[str, 'FunctionMethod']):
self.detailed_callsites.append({'Src': src_loc, 'Dst': dst})


def capture_source_files_in_tree(directory_tree: str) -> list[str]:
"""Captures source code files in a given directory."""
language_extensions = ['.go', '.cgo']
language_files = []
for dirpath, _dirnames, filenames in os.walk(directory_tree):
for filename in filenames:
if pathlib.Path(filename).suffix in language_extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files


def load_treesitter_trees(source_files: list[str],
is_log: bool = True) -> list[SourceCodeFile]:
"""Creates treesitter trees for all files in a given list of source files."""
Expand Down
21 changes: 0 additions & 21 deletions src/fuzz_introspector/frontends/frontend_jvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

from typing import Optional

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -1278,25 +1276,6 @@ def get_reachable_methods(
return visited_methods


def capture_source_files_in_tree(directory_tree: str) -> list[str]:
"""Captures source code files in a given directory."""
exclude_directories = [
'target', 'node_modules', 'aflplusplus', 'honggfuzz', 'inspector',
'libfuzzer'
]
language_extensions = ['.java']
language_files = []
for dirpath, _, filenames in os.walk(directory_tree):
# Skip some non project directories
if any(exclude in dirpath for exclude in exclude_directories):
continue

for filename in filenames:
if pathlib.Path(filename).suffix in language_extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files


def load_treesitter_trees(source_files: list[str],
entrypoint: str,
is_log: bool = True) -> list[SourceCodeFile]:
Expand Down
21 changes: 0 additions & 21 deletions src/fuzz_introspector/frontends/frontend_rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

from typing import Any, Optional

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -849,25 +847,6 @@ def get_reachable_functions(
return visited_funcs


def capture_source_files_in_tree(directory_tree: str) -> list[str]:
"""Captures source code files in a given directory."""
exclude_directories = [
'tests', 'examples', 'benches', 'node_modules', 'aflplusplus',
'honggfuzz', 'inspector', 'libfuzzer'
]
language_extensions = ['.rs']
language_files = []
for dirpath, _, filenames in os.walk(directory_tree):
# Skip some non project directories
if any(exclude in dirpath for exclude in exclude_directories):
continue

for filename in filenames:
if pathlib.Path(filename).suffix in language_extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files


def load_treesitter_trees(source_files: list[str],
is_log: bool = True) -> list[SourceCodeFile]:
"""Creates treesitter trees for all files in a given list of source files."""
Expand Down
Loading
Loading