Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(imports): handle errors after processing AST #619

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 29 additions & 26 deletions src/imports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,40 @@ pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyRe
.collect();

// Process each file in parallel and collect results
let results: PyResult<Vec<_>> = rust_file_paths
let results: Vec<_> = rust_file_paths
.par_iter()
.map(|path_str| _get_imports_from_py_file(path_str))
.map(|path_str| match _get_imports_from_py_file(path_str) {
Ok(result) => (path_str, Ok(result)),
Err(e) => (path_str, Err(e)),
})
.collect();

let results = results?;

// Merge results from each thread
let mut all_imports = HashMap::new();
for file_result in results {
for (module, locations) in file_result {
all_imports
.entry(module)
.or_insert_with(Vec::new)
.extend(locations);
let mut errors = Vec::new();

for (path, file_result) in results {
match file_result {
Ok(file_result) => {
for (module, locations) in file_result {
all_imports
.entry(module)
.or_insert_with(Vec::new)
.extend(locations);
}
}
Err(e) => errors.push((path.to_string(), e)),
}
}

for (path, error) in errors {
log::warn!(
"Warning: Skipping processing of {} because of the following error: \"{}\".",
path,
error
);
}

convert_to_python_dict(py, all_imports)
}

Expand All @@ -60,32 +76,19 @@ pub fn get_imports_from_py_file(py: Python, file_path: &PyString) -> PyResult<Py
/// Core helper function that extracts import statements and their locations from the content of a single Python file.
/// Used internally by both parallel and single file processing functions.
fn _get_imports_from_py_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
let file_content = match read_file(path_str) {
Ok(content) => content,
Err(_) => {
log::warn!("Warning: File {} could not be read. Skipping...", path_str);
return Ok(HashMap::new());
}
};
let file_content = read_file(path_str)?;

let ast = get_ast_from_file_content(&file_content, path_str)
.map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?;
let ast = parse(&file_content, Mode::Module, path_str)
.map_err(|e| PySyntaxError::new_err(e.to_string()))?;

let imported_modules = extract_imports_from_ast(ast);

Ok(convert_imports_with_textranges_to_location_objects(
imported_modules,
path_str,
&file_content,
))
}

/// Parses the content of a Python file into an abstract syntax tree (AST).
pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResult<Mod> {
parse(file_content, Mode::Module, file_path)
.map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", file_path, e)))
}

/// Iterates through an AST to identify and collect import statements, and returns them together with their
/// respective TextRange for each occurrence.
fn extract_imports_from_ast(ast: Mod) -> HashMap<String, Vec<TextRange>> {
Expand Down
34 changes: 24 additions & 10 deletions tests/unit/imports/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,19 +122,33 @@ def test_import_parser_file_encodings_ipynb(code_cell_content: list[str], encodi
assert get_imported_modules_from_list_of_files([random_file]) == {"foo": [Location(random_file, 1, 0)]}


def test_import_parser_file_encodings_warning(tmp_path: Path, caplog: LogCaptureFixture) -> None:
file_path = Path("file1.py")
def test_import_parser_errors(tmp_path: Path, caplog: LogCaptureFixture) -> None:
file_ok = Path("file_ok.py")
file_with_bad_encoding = Path("file_with_bad_encoding.py")
file_with_syntax_error = Path("file_with_syntax_error.py")

with run_within_dir(tmp_path):
# The characters below are represented differently in ISO-8859-1 and UTF-8, so this should raise an error.
with file_path.open("w", encoding="ISO-8859-1") as f:
with file_ok.open("w") as f:
f.write("import black")

with file_with_bad_encoding.open("w", encoding="ISO-8859-1") as f:
f.write("# -*- coding: utf-8 -*-\nprint('ÆØÅ')")

with caplog.at_level(logging.WARNING):
assert get_imported_modules_from_list_of_files([file_path]) == {}
with file_with_syntax_error.open("w") as f:
f.write("invalid_syntax:::")

# //TODO logging from Rust still includes it's own warning and file + line number. Can we get rid of that?
pattern = re.compile(
r"WARNING deptry.imports:imports.rs:\d+ Warning: File file1.py could not be read. Skipping...\n"
with caplog.at_level(logging.WARNING):
assert get_imported_modules_from_list_of_files([
file_ok,
file_with_bad_encoding,
file_with_syntax_error,
]) == {"black": [Location(file=Path("file_ok.py"), line=1, column=8)]}

assert re.search(
r"WARNING deptry.imports:imports.rs:\d+ Warning: Skipping processing of file_with_bad_encoding.py because of the following error: \"OSError: Failed to decode file content with the detected encoding.\".",
caplog.text,
)
assert re.search(
r"WARNING deptry.imports:imports.rs:\d+ Warning: Skipping processing of file_with_syntax_error.py because of the following error: \"SyntaxError: invalid syntax. Got unexpected token ':' at byte offset 15\".",
caplog.text,
)
assert pattern.search(caplog.text) is not None