Skip to content

Commit

Permalink
[Frontend] Generalise source code discovery
Browse files Browse the repository at this point in the history
Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
  • Loading branch information
arthurscchan committed Jan 17, 2025
1 parent 82eb64a commit 0654993
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 155 deletions.
50 changes: 12 additions & 38 deletions src/fuzz_introspector/frontends/frontend_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
"""Fuzz Introspector Light frontend"""

import os
import pathlib

import logging

Expand Down Expand Up @@ -643,38 +642,23 @@ def get_linenumber(self, bytepos):
return -1


def capture_source_files_in_tree(directory_tree, language):
"""Captures source code files in a given directory."""
language_extensions = {'c': ['.c', '.h']}
language_files = []
paths_to_avoid = [
'/src/aflplusplus', '/src/honggfuzz', '/src/libfuzzer', '/src/fuzztest'
]
def load_treesitter_trees(source_files: list[str],
is_log: bool = True) -> list[SourceCodeFile]:
"""Creates treesitter trees for all files in a given list of source files."""
results = []

for dirpath, _dirnames, filenames in os.walk(directory_tree):
if any([x for x in paths_to_avoid if dirpath.startswith(x)]):
for code_file in source_files:
if not os.path.isfile(code_file):
continue
for filename in filenames:
for extensions in language_extensions[language]:
if pathlib.Path(filename).suffix in extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files

source_cls = SourceCodeFile(code_file, 'c')

def load_treesitter_trees(source_files, log_harnesses=True):
"""Creates treesitter trees for all files in a given list of source files."""
results = []
if is_log:
if source_cls.has_libfuzzer_harness():
logger.info('harness: %s', code_file)

results.append(source_cls)

for language in source_files:
if language == 'c':
for code_file in source_files[language]:
if not os.path.isfile(code_file):
continue
source_cls = SourceCodeFile(code_file, language)
if log_harnesses:
if source_cls.has_libfuzzer_harness():
logger.info('harness: %s', code_file)
results.append(source_cls)
return results


Expand All @@ -684,13 +668,3 @@ def analyse_source_code(source_content: str) -> SourceCodeFile:
language='c',
source_content=source_content.encode())
return source_code


def analyse_folder(folder_path: str, language: str = 'c') -> Project:
"""Constructs a project based on the source code in a folder."""
source_files = {}
source_files[language] = capture_source_files_in_tree(
folder_path, language)
source_codes = load_treesitter_trees(source_files)
project = Project(source_codes)
return project
25 changes: 0 additions & 25 deletions src/fuzz_introspector/frontends/frontend_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from typing import Any, Optional, Set, List

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -898,30 +897,6 @@ def _recursive_function_depth(function: FunctionDefinition) -> int:
return func_depth


def capture_source_files_in_tree(directory_tree):
"""Captures source code files in a given directory."""
language_files = []
language_extensions = [
'.c', '.cpp', '.cc', '.c++', '.cxx', '.h', '.hpp', '.hh', '.hxx',
'.inl'
]
exclude_directories = [
'build', 'target', 'node_modules', 'aflplusplus', 'honggfuzz',
'inspector', 'libfuzzer', 'fuzztest'
]

for dirpath, _, filenames in os.walk(directory_tree):
# Skip some non project directories
if any(exclude in dirpath for exclude in exclude_directories):
continue

for filename in filenames:
if pathlib.Path(filename).suffix.lower() in language_extensions:
language_files.append(os.path.join(dirpath, filename))

return language_files


def load_treesitter_trees(source_files, is_log=True):
"""Creates treesitter trees for all files in a given list of source files."""
results = []
Expand Down
12 changes: 0 additions & 12 deletions src/fuzz_introspector/frontends/frontend_go.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import Optional

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -786,17 +785,6 @@ def extract_callsites(self, all_funcs_meths: dict[str, 'FunctionMethod']):
self.detailed_callsites.append({'Src': src_loc, 'Dst': dst})


def capture_source_files_in_tree(directory_tree: str) -> list[str]:
"""Captures source code files in a given directory."""
language_extensions = ['.go', '.cgo']
language_files = []
for dirpath, _dirnames, filenames in os.walk(directory_tree):
for filename in filenames:
if pathlib.Path(filename).suffix in language_extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files


def load_treesitter_trees(source_files: list[str],
is_log: bool = True) -> list[SourceCodeFile]:
"""Creates treesitter trees for all files in a given list of source files."""
Expand Down
20 changes: 0 additions & 20 deletions src/fuzz_introspector/frontends/frontend_jvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import Optional

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -1278,25 +1277,6 @@ def get_reachable_methods(
return visited_methods


def capture_source_files_in_tree(directory_tree: str) -> list[str]:
"""Captures source code files in a given directory."""
exclude_directories = [
'target', 'node_modules', 'aflplusplus', 'honggfuzz', 'inspector',
'libfuzzer'
]
language_extensions = ['.java']
language_files = []
for dirpath, _, filenames in os.walk(directory_tree):
# Skip some non project directories
if any(exclude in dirpath for exclude in exclude_directories):
continue

for filename in filenames:
if pathlib.Path(filename).suffix in language_extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files


def load_treesitter_trees(source_files: list[str],
entrypoint: str,
is_log: bool = True) -> list[SourceCodeFile]:
Expand Down
20 changes: 0 additions & 20 deletions src/fuzz_introspector/frontends/frontend_rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import Any, Optional

import os
import pathlib
import logging

from tree_sitter import Language, Parser, Node
Expand Down Expand Up @@ -849,25 +848,6 @@ def get_reachable_functions(
return visited_funcs


def capture_source_files_in_tree(directory_tree: str) -> list[str]:
"""Captures source code files in a given directory."""
exclude_directories = [
'tests', 'examples', 'benches', 'node_modules', 'aflplusplus',
'honggfuzz', 'inspector', 'libfuzzer'
]
language_extensions = ['.rs']
language_files = []
for dirpath, _, filenames in os.walk(directory_tree):
# Skip some non project directories
if any(exclude in dirpath for exclude in exclude_directories):
continue

for filename in filenames:
if pathlib.Path(filename).suffix in language_extensions:
language_files.append(os.path.join(dirpath, filename))
return language_files


def load_treesitter_trees(source_files: list[str],
is_log: bool = True) -> list[SourceCodeFile]:
"""Creates treesitter trees for all files in a given list of source files."""
Expand Down
Loading

0 comments on commit 0654993

Please sign in to comment.