From 0287201b74325087db0f6e2e9c45203a80583757 Mon Sep 17 00:00:00 2001 From: Mathieu Kniewallner Date: Mon, 18 Mar 2024 13:44:47 +0100 Subject: [PATCH] fix(imports): handle errors after processing AST --- src/imports.rs | 55 ++++++++++++++++-------------- tests/unit/imports/test_extract.py | 34 ++++++++++++------ 2 files changed, 53 insertions(+), 36 deletions(-) diff --git a/src/imports.rs b/src/imports.rs index 25db77c7..34404dd4 100644 --- a/src/imports.rs +++ b/src/imports.rs @@ -26,24 +26,40 @@ pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyRe .collect(); // Process each file in parallel and collect results - let results: PyResult> = rust_file_paths + let results: Vec<_> = rust_file_paths .par_iter() - .map(|path_str| _get_imports_from_py_file(path_str)) + .map(|path_str| match _get_imports_from_py_file(path_str) { + Ok(result) => (path_str, Ok(result)), + Err(e) => (path_str, Err(e)), + }) .collect(); - let results = results?; - // Merge results from each thread let mut all_imports = HashMap::new(); - for file_result in results { - for (module, locations) in file_result { - all_imports - .entry(module) - .or_insert_with(Vec::new) - .extend(locations); + let mut errors = Vec::new(); + + for (path, file_result) in results { + match file_result { + Ok(file_result) => { + for (module, locations) in file_result { + all_imports + .entry(module) + .or_insert_with(Vec::new) + .extend(locations); + } + } + Err(e) => errors.push((path.to_string(), e)), } } + for (path, error) in errors { + log::warn!( + "Warning: Skipping processing of {} because of the following error: \"{}\".", + path, + error + ); + } + convert_to_python_dict(py, all_imports) } @@ -60,19 +76,12 @@ pub fn get_imports_from_py_file(py: Python, file_path: &PyString) -> PyResult PyResult>> { - let file_content = match read_file(path_str) { - Ok(content) => content, - Err(_) => { - log::warn!("Warning: File {} could not be read. Skipping...", path_str); - return Ok(HashMap::new()); - } - }; + let file_content = read_file(path_str)?; - let ast = get_ast_from_file_content(&file_content, path_str) - .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?; + let ast = parse(&file_content, Mode::Module, path_str) + .map_err(|e| PySyntaxError::new_err(e.to_string()))?; let imported_modules = extract_imports_from_ast(ast); - Ok(convert_imports_with_textranges_to_location_objects( imported_modules, path_str, @@ -80,12 +89,6 @@ fn _get_imports_from_py_file(path_str: &str) -> PyResult PyResult { - parse(file_content, Mode::Module, file_path) - .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", file_path, e))) -} - /// Iterates through an AST to identify and collect import statements, and returns them together with their /// respective TextRange for each occurrence. fn extract_imports_from_ast(ast: Mod) -> HashMap> { diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py index 05dd79a5..96da43b8 100644 --- a/tests/unit/imports/test_extract.py +++ b/tests/unit/imports/test_extract.py @@ -122,19 +122,33 @@ def test_import_parser_file_encodings_ipynb(code_cell_content: list[str], encodi assert get_imported_modules_from_list_of_files([random_file]) == {"foo": [Location(random_file, 1, 0)]} -def test_import_parser_file_encodings_warning(tmp_path: Path, caplog: LogCaptureFixture) -> None: - file_path = Path("file1.py") +def test_import_parser_errors(tmp_path: Path, caplog: LogCaptureFixture) -> None: + file_ok = Path("file_ok.py") + file_with_bad_encoding = Path("file_with_bad_encoding.py") + file_with_syntax_error = Path("file_with_syntax_error.py") with run_within_dir(tmp_path): - # The characters below are represented differently in ISO-8859-1 and UTF-8, so this should raise an error. - with file_path.open("w", encoding="ISO-8859-1") as f: + with file_ok.open("w") as f: + f.write("import black") + + with file_with_bad_encoding.open("w", encoding="ISO-8859-1") as f: f.write("# -*- coding: utf-8 -*-\nprint('ÆØÅ')") - with caplog.at_level(logging.WARNING): - assert get_imported_modules_from_list_of_files([file_path]) == {} + with file_with_syntax_error.open("w") as f: + f.write("invalid_syntax:::") - # //TODO logging from Rust still includes it's own warning and file + line number. Can we get rid of that? - pattern = re.compile( - r"WARNING deptry.imports:imports.rs:\d+ Warning: File file1.py could not be read. Skipping...\n" + with caplog.at_level(logging.WARNING): + assert get_imported_modules_from_list_of_files([ + file_ok, + file_with_bad_encoding, + file_with_syntax_error, + ]) == {"black": [Location(file=Path("file_ok.py"), line=1, column=8)]} + + assert re.search( + r"WARNING deptry.imports:imports.rs:\d+ Warning: Skipping processing of file_with_bad_encoding.py because of the following error: \"OSError: Failed to decode file content with the detected encoding.\".", + caplog.text, + ) + assert re.search( + r"WARNING deptry.imports:imports.rs:\d+ Warning: Skipping processing of file_with_syntax_error.py because of the following error: \"SyntaxError: invalid syntax. Got unexpected token ':' at byte offset 15\".", + caplog.text, ) - assert pattern.search(caplog.text) is not None